Move all the code to a lib folder
This commit is contained in:
parent
e963d00014
commit
9d1f1ad5c0
31 changed files with 123 additions and 98 deletions
81
lib/parser/isbn.go
Normal file
81
lib/parser/isbn.go
Normal file
|
@ -0,0 +1,81 @@
|
|||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
func ISBN(orig string) string {
|
||||
isbn := getISBN(orig)
|
||||
|
||||
if len(isbn) != 13 && len(isbn) != 10 {
|
||||
return ""
|
||||
}
|
||||
if !validChecksum(isbn) {
|
||||
return ""
|
||||
}
|
||||
|
||||
return toISBN13(isbn)
|
||||
}
|
||||
|
||||
func getISBN(src string) string {
|
||||
isbn := strings.ToUpper(src)
|
||||
isNotNumber := func(r rune) bool {
|
||||
return !unicode.IsNumber(r)
|
||||
}
|
||||
isNotNumberOrX := func(r rune) bool {
|
||||
return !unicode.IsNumber(r) && r != 'X'
|
||||
}
|
||||
|
||||
isbn = strings.TrimLeftFunc(isbn, isNotNumber)
|
||||
isbn = strings.TrimRightFunc(isbn, isNotNumberOrX)
|
||||
isbn = strings.Replace(isbn, "-", "", -1)
|
||||
isbn = strings.Replace(isbn, " ", "", -1)
|
||||
|
||||
if len(isbn) > 13 {
|
||||
isbn = isbn[:13]
|
||||
}
|
||||
return isbn
|
||||
}
|
||||
|
||||
func validChecksum(isbn string) bool {
|
||||
if len(isbn) == 10 {
|
||||
return rune(isbn[9]) == checkDigit10(isbn)
|
||||
}
|
||||
return rune(isbn[12]) == checkDigit13(isbn)
|
||||
}
|
||||
|
||||
func toISBN13(isbn string) string {
|
||||
if len(isbn) == 13 {
|
||||
return isbn
|
||||
}
|
||||
|
||||
isbn = "978" + isbn
|
||||
return isbn[:12] + string(checkDigit13(isbn))
|
||||
}
|
||||
|
||||
func checkDigit10(isbn string) rune {
|
||||
acc := 0
|
||||
for i, r := range isbn[:9] {
|
||||
acc += (10 - i) * int(r-'0')
|
||||
}
|
||||
check := (11 - (acc % 11)) % 11
|
||||
|
||||
if check == 10 {
|
||||
return 'X'
|
||||
}
|
||||
return rune(check + '0')
|
||||
}
|
||||
|
||||
func checkDigit13(isbn string) rune {
|
||||
acc := 0
|
||||
for i, r := range isbn[:12] {
|
||||
n := int(r - '0')
|
||||
if i%2 == 1 {
|
||||
n = 3 * n
|
||||
}
|
||||
acc += n
|
||||
}
|
||||
check := (10 - (acc % 10)) % 10
|
||||
return rune(check + '0')
|
||||
}
|
26
lib/parser/isbn_test.go
Normal file
26
lib/parser/isbn_test.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
package parser
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestISBN(t *testing.T) {
|
||||
isbn_test := [][]string{
|
||||
[]string{"", ""},
|
||||
[]string{"978074341", ""},
|
||||
[]string{"9780743412395", ""},
|
||||
[]string{"9780743412391", "9780743412391"},
|
||||
[]string{"0-688-12189-6", "9780688121891"},
|
||||
[]string{"033026155X", "9780330261555"},
|
||||
[]string{"033026155x", "9780330261555"},
|
||||
[]string{"0307756432", "9780307756435"},
|
||||
[]string{"urn:isbn:978-3-8387-0337-4:", "9783838703374"},
|
||||
[]string{"EPUB9788865971468-113465", "9788865971468"},
|
||||
}
|
||||
|
||||
for _, isbn := range isbn_test {
|
||||
src := isbn[0]
|
||||
dst := isbn[1]
|
||||
if res := ISBN(src); res != dst {
|
||||
t.Error("ISBN parse failed: ", src, " => ", res, " (expected ", dst, ")")
|
||||
}
|
||||
}
|
||||
}
|
66
lib/parser/language.go
Normal file
66
lib/parser/language.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
package parser
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
|
||||
"github.com/meskio/epubgo"
|
||||
"github.com/rainycape/cld2"
|
||||
)
|
||||
|
||||
func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
|
||||
spine, err := epub.Spine()
|
||||
if err != nil {
|
||||
return orig_langs
|
||||
}
|
||||
|
||||
var err_spine error
|
||||
err_spine = nil
|
||||
langs := []string{}
|
||||
for err_spine == nil {
|
||||
html, err := spine.Open()
|
||||
err_spine = spine.Next()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
defer html.Close()
|
||||
|
||||
buff, err := ioutil.ReadAll(html)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
langs = append(langs, cld2.Detect(string(buff)))
|
||||
}
|
||||
|
||||
lang := commonLang(langs)
|
||||
if lang != "un" && differentLang(lang, orig_langs) {
|
||||
return []string{lang}
|
||||
}
|
||||
return orig_langs
|
||||
}
|
||||
|
||||
func commonLang(langs []string) string {
|
||||
count := map[string]int{}
|
||||
for _, l := range langs {
|
||||
count[l]++
|
||||
}
|
||||
|
||||
lang := "un"
|
||||
maxcount := 0
|
||||
for l, c := range count {
|
||||
if c > maxcount && l != "un" {
|
||||
lang = l
|
||||
maxcount = c
|
||||
}
|
||||
}
|
||||
return lang
|
||||
}
|
||||
|
||||
func differentLang(lang string, orig_langs []string) bool {
|
||||
orig_lang := "un"
|
||||
if len(orig_langs) > 0 && len(orig_langs) >= 2 {
|
||||
orig_lang = strings.ToLower(orig_langs[0][0:2])
|
||||
}
|
||||
|
||||
return orig_lang != lang
|
||||
}
|
103
lib/parser/parser.go
Normal file
103
lib/parser/parser.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
package parser
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/meskio/epubgo"
|
||||
)
|
||||
|
||||
type MetaData map[string]interface{}
|
||||
|
||||
func EpubMetadata(epub *epubgo.Epub) MetaData {
|
||||
metadata := MetaData{}
|
||||
for _, m := range epub.MetadataFields() {
|
||||
data, err := epub.Metadata(m)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch m {
|
||||
case "creator":
|
||||
metadata["author"] = parseAuthr(data)
|
||||
case "description":
|
||||
metadata[m] = parseDescription(data)
|
||||
case "subject":
|
||||
metadata[m] = parseSubject(data)
|
||||
case "date":
|
||||
metadata[m] = parseDate(data)
|
||||
case "language":
|
||||
metadata["lang"] = GuessLang(epub, data)
|
||||
case "title", "contributor", "publisher":
|
||||
metadata[m] = cleanStr(strings.Join(data, ", "))
|
||||
case "identifier":
|
||||
attr, _ := epub.MetadataAttr(m)
|
||||
for i, d := range data {
|
||||
if attr[i]["scheme"] == "ISBN" {
|
||||
isbn := ISBN(d)
|
||||
if isbn != "" {
|
||||
metadata["isbn"] = isbn
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
metadata[m] = strings.Join(data, ", ")
|
||||
}
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
func cleanStr(str string) string {
|
||||
str = strings.Replace(str, "'", "'", -1)
|
||||
exp, _ := regexp.Compile("&[^;]*;")
|
||||
str = exp.ReplaceAllString(str, "")
|
||||
exp, _ = regexp.Compile("[ ,]*$")
|
||||
str = exp.ReplaceAllString(str, "")
|
||||
return str
|
||||
}
|
||||
|
||||
func parseAuthr(creator []string) []string {
|
||||
exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$")
|
||||
exp2, _ := regexp.Compile("^[^:]*: *(.*)$")
|
||||
res := make([]string, len(creator))
|
||||
for i, s := range creator {
|
||||
auth := exp1.FindStringSubmatch(s)
|
||||
if auth != nil {
|
||||
res[i] = cleanStr(strings.Join(auth[2:], ", "))
|
||||
} else {
|
||||
auth := exp2.FindStringSubmatch(s)
|
||||
if auth != nil {
|
||||
res[i] = cleanStr(auth[1])
|
||||
} else {
|
||||
res[i] = cleanStr(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func parseDescription(description []string) string {
|
||||
str := cleanStr(strings.Join(description, "\n"))
|
||||
str = strings.Replace(str, "</p>", "\n", -1)
|
||||
exp, _ := regexp.Compile("<[^>]*>")
|
||||
str = exp.ReplaceAllString(str, "")
|
||||
str = strings.Replace(str, "&", "&", -1)
|
||||
str = strings.Replace(str, "<", "<", -1)
|
||||
str = strings.Replace(str, ">", ">", -1)
|
||||
str = strings.Replace(str, "\\n", "\n", -1)
|
||||
return str
|
||||
}
|
||||
|
||||
func parseSubject(subject []string) []string {
|
||||
var res []string
|
||||
for _, s := range subject {
|
||||
res = append(res, strings.Split(s, " / ")...)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func parseDate(date []string) string {
|
||||
if len(date) == 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.Replace(date[0], "Unspecified: ", "", -1)
|
||||
}
|
Reference in a new issue