package parser import ( "regexp" "strings" "github.com/meskio/epubgo" "gitlab.com/trantor/trantor/lib/database" ) func EpubMetadata(epub *epubgo.Epub) database.Book { book := database.Book{} for _, m := range epub.MetadataFields() { data, err := epub.Metadata(m) if err != nil { continue } switch m { case "title": book.Title = cleanStr(strings.Join(data, ", ")) case "creator": book.Authors = parseAuthr(data) case "contributor": book.Contributor = cleanStr(strings.Join(data, ", ")) case "publisher": book.Publisher = cleanStr(strings.Join(data, ", ")) case "description": book.Description = parseDescription(data) case "subject": book.Tags = ParseSubject(data) case "date": book.Date = parseDate(data) case "language": book.Lang = GuessLang(epub, data) case "identifier": attr, _ := epub.MetadataAttr(m) for i, d := range data { if attr[i]["scheme"] == "ISBN" { isbn := ISBN(d) if isbn != "" { book.Isbn = isbn } } } } } return book } func cleanStr(str string) string { str = strings.Replace(str, "'", "'", -1) exp, _ := regexp.Compile("&[^;]*;") str = exp.ReplaceAllString(str, "") exp, _ = regexp.Compile("[ ,]*$") str = exp.ReplaceAllString(str, "") return str } func parseAuthr(creator []string) []string { exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$") exp2, _ := regexp.Compile("^[^:]*: *(.*)$") res := make([]string, len(creator)) for i, s := range creator { auth := exp1.FindStringSubmatch(s) if auth != nil { res[i] = cleanStr(strings.Join(auth[2:], ", ")) } else { auth := exp2.FindStringSubmatch(s) if auth != nil { res[i] = cleanStr(auth[1]) } else { res[i] = cleanStr(s) } } } return res } func parseDescription(description []string) string { str := cleanStr(strings.Join(description, "\n")) str = strings.Replace(str, "

", "\n", -1) exp, _ := regexp.Compile("<[^>]*>") str = exp.ReplaceAllString(str, "") str = strings.Replace(str, "&", "&", -1) str = strings.Replace(str, "<", "<", -1) str = strings.Replace(str, ">", ">", -1) str = strings.Replace(str, "\\n", "\n", -1) return str } func ParseSubject(subject []string) []string { parsed := subject for _, sep := range []string{"/", ","} { p2 := []string{} for _, s := range parsed { p2 = append(p2, strings.Split(s, sep)...) } parsed = p2 } res := []string{} for _, s := range parsed { sub := strings.Trim(s, " ") sub = strings.ToLower(sub) if len(sub) != 0 { res = append(res, sub) } } return res } func parseDate(date []string) string { if len(date) == 0 { return "" } return strings.Replace(date[0], "Unspecified: ", "", -1) }