package parser import ( "regexp" "strings" "github.com/meskio/epubgo" ) type MetaData map[string]interface{} func EpubMetadata(epub *epubgo.Epub) MetaData { metadata := MetaData{} for _, m := range epub.MetadataFields() { data, err := epub.Metadata(m) if err != nil { continue } switch m { case "creator": metadata["author"] = parseAuthr(data) case "description": metadata[m] = parseDescription(data) case "subject": metadata[m] = parseSubject(data) case "date": metadata[m] = parseDate(data) case "language": metadata["lang"] = GuessLang(epub, data) case "title", "contributor", "publisher": metadata[m] = cleanStr(strings.Join(data, ", ")) case "identifier": attr, _ := epub.MetadataAttr(m) for i, d := range data { if attr[i]["scheme"] == "ISBN" { isbn := ISBN(d) if isbn != "" { metadata["isbn"] = isbn } } } default: metadata[m] = strings.Join(data, ", ") } } return metadata } func cleanStr(str string) string { str = strings.Replace(str, "'", "'", -1) exp, _ := regexp.Compile("&[^;]*;") str = exp.ReplaceAllString(str, "") exp, _ = regexp.Compile("[ ,]*$") str = exp.ReplaceAllString(str, "") return str } func parseAuthr(creator []string) []string { exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$") exp2, _ := regexp.Compile("^[^:]*: *(.*)$") res := make([]string, len(creator)) for i, s := range creator { auth := exp1.FindStringSubmatch(s) if auth != nil { res[i] = cleanStr(strings.Join(auth[2:], ", ")) } else { auth := exp2.FindStringSubmatch(s) if auth != nil { res[i] = cleanStr(auth[1]) } else { res[i] = cleanStr(s) } } } return res } func parseDescription(description []string) string { str := cleanStr(strings.Join(description, "\n")) str = strings.Replace(str, "

", "\n", -1) exp, _ := regexp.Compile("<[^>]*>") str = exp.ReplaceAllString(str, "") str = strings.Replace(str, "&", "&", -1) str = strings.Replace(str, "<", "<", -1) str = strings.Replace(str, ">", ">", -1) str = strings.Replace(str, "\\n", "\n", -1) return str } func parseSubject(subject []string) []string { var res []string for _, s := range subject { res = append(res, strings.Split(s, " / ")...) } return res } func parseDate(date []string) string { if len(date) == 0 { return "" } return strings.Replace(date[0], "Unspecified: ", "", -1) }