116 lines
2.7 KiB
Go
116 lines
2.7 KiB
Go
package parser
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/meskio/epubgo"
|
|
"gitlab.com/trantor/trantor/lib/database"
|
|
)
|
|
|
|
func EpubMetadata(epub *epubgo.Epub) database.Book {
|
|
book := database.Book{}
|
|
for _, m := range epub.MetadataFields() {
|
|
data, err := epub.Metadata(m)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
switch m {
|
|
case "title":
|
|
book.Title = cleanStr(strings.Join(data, ", "))
|
|
case "creator":
|
|
book.Authors = parseAuthr(data)
|
|
case "contributor":
|
|
book.Contributor = cleanStr(strings.Join(data, ", "))
|
|
case "publisher":
|
|
book.Publisher = cleanStr(strings.Join(data, ", "))
|
|
case "description":
|
|
book.Description = parseDescription(data)
|
|
case "subject":
|
|
book.Tags = parseSubject(data)
|
|
case "date":
|
|
book.Date = parseDate(data)
|
|
case "language":
|
|
book.Lang = GuessLang(epub, data)
|
|
case "identifier":
|
|
attr, _ := epub.MetadataAttr(m)
|
|
for i, d := range data {
|
|
if attr[i]["scheme"] == "ISBN" {
|
|
isbn := ISBN(d)
|
|
if isbn != "" {
|
|
book.Isbn = isbn
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return book
|
|
}
|
|
|
|
func cleanStr(str string) string {
|
|
str = strings.Replace(str, "'", "'", -1)
|
|
exp, _ := regexp.Compile("&[^;]*;")
|
|
str = exp.ReplaceAllString(str, "")
|
|
exp, _ = regexp.Compile("[ ,]*$")
|
|
str = exp.ReplaceAllString(str, "")
|
|
return str
|
|
}
|
|
|
|
func parseAuthr(creator []string) []string {
|
|
exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$")
|
|
exp2, _ := regexp.Compile("^[^:]*: *(.*)$")
|
|
res := make([]string, len(creator))
|
|
for i, s := range creator {
|
|
auth := exp1.FindStringSubmatch(s)
|
|
if auth != nil {
|
|
res[i] = cleanStr(strings.Join(auth[2:], ", "))
|
|
} else {
|
|
auth := exp2.FindStringSubmatch(s)
|
|
if auth != nil {
|
|
res[i] = cleanStr(auth[1])
|
|
} else {
|
|
res[i] = cleanStr(s)
|
|
}
|
|
}
|
|
}
|
|
return res
|
|
}
|
|
|
|
func parseDescription(description []string) string {
|
|
str := cleanStr(strings.Join(description, "\n"))
|
|
str = strings.Replace(str, "</p>", "\n", -1)
|
|
exp, _ := regexp.Compile("<[^>]*>")
|
|
str = exp.ReplaceAllString(str, "")
|
|
str = strings.Replace(str, "&", "&", -1)
|
|
str = strings.Replace(str, "<", "<", -1)
|
|
str = strings.Replace(str, ">", ">", -1)
|
|
str = strings.Replace(str, "\\n", "\n", -1)
|
|
return str
|
|
}
|
|
|
|
func parseSubject(subject []string) []string {
|
|
parsed := subject
|
|
for _, sep := range []string{"/", ","} {
|
|
p2 := []string{}
|
|
for _, s := range subject {
|
|
p2 = append(p2, strings.Split(s, sep)...)
|
|
}
|
|
parsed = p2
|
|
}
|
|
res := []string{}
|
|
for _, s := range parsed {
|
|
sub := strings.Trim(s, " ")
|
|
sub = strings.ToLower(sub)
|
|
if len(sub) != 0 {
|
|
res = append(res, sub)
|
|
}
|
|
}
|
|
return res
|
|
}
|
|
|
|
func parseDate(date []string) string {
|
|
if len(date) == 0 {
|
|
return ""
|
|
}
|
|
return strings.Replace(date[0], "Unspecified: ", "", -1)
|
|
}
|