From 2f2ff3dd8f4f1f6413533af90fa0861fd0fc21a6 Mon Sep 17 00:00:00 2001 From: Las Zenow Date: Sun, 8 Feb 2015 00:16:29 -0500 Subject: [PATCH] Add isbn parser --- parser/isbn.go | 81 +++++++++++++++++++++++++++++++++++++++++++++ parser/isbn_test.go | 26 +++++++++++++++ parser/parser.go | 7 ++-- 3 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 parser/isbn.go create mode 100644 parser/isbn_test.go diff --git a/parser/isbn.go b/parser/isbn.go new file mode 100644 index 0000000..de730fe --- /dev/null +++ b/parser/isbn.go @@ -0,0 +1,81 @@ +package parser + +import ( + "strings" + "unicode" +) + +func ISBN(orig string) string { + isbn := getISBN(orig) + + if len(isbn) != 13 && len(isbn) != 10 { + return "" + } + if !validChecksum(isbn) { + return "" + } + + return toISBN13(isbn) +} + +func getISBN(src string) string { + isbn := strings.ToUpper(src) + isNotNumber := func(r rune) bool { + return !unicode.IsNumber(r) + } + isNotNumberOrX := func(r rune) bool { + return !unicode.IsNumber(r) && r != 'X' + } + + isbn = strings.TrimLeftFunc(isbn, isNotNumber) + isbn = strings.TrimRightFunc(isbn, isNotNumberOrX) + isbn = strings.Replace(isbn, "-", "", -1) + isbn = strings.Replace(isbn, " ", "", -1) + + if len(isbn) > 13 { + isbn = isbn[:13] + } + return isbn +} + +func validChecksum(isbn string) bool { + if len(isbn) == 10 { + return rune(isbn[9]) == checkDigit10(isbn) + } + return rune(isbn[12]) == checkDigit13(isbn) +} + +func toISBN13(isbn string) string { + if len(isbn) == 13 { + return isbn + } + + isbn = "978" + isbn + return isbn[:12] + string(checkDigit13(isbn)) +} + +func checkDigit10(isbn string) rune { + acc := 0 + for i, r := range isbn[:9] { + acc += (10 - i) * int(r-'0') + } + check := (11 - (acc % 11)) % 11 + + if check == 10 { + return 'X' + } + return rune(check + '0') +} + +func checkDigit13(isbn string) rune { + acc := 0 + for i, r := range isbn[:12] { + n := int(r - '0') + if i%2 == 1 { + n = 3 * n + } + acc += n + } + check := (10 - (acc % 10)) % 10 + return rune(check + '0') +} diff --git a/parser/isbn_test.go b/parser/isbn_test.go new file mode 100644 index 0000000..ad404d9 --- /dev/null +++ b/parser/isbn_test.go @@ -0,0 +1,26 @@ +package parser + +import "testing" + +func TestISBN(t *testing.T) { + isbn_test := [][]string{ + []string{"", ""}, + []string{"978074341", ""}, + []string{"9780743412395", ""}, + []string{"9780743412391", "9780743412391"}, + []string{"0-688-12189-6", "9780688121891"}, + []string{"033026155X", "9780330261555"}, + []string{"033026155x", "9780330261555"}, + []string{"0307756432", "9780307756435"}, + []string{"urn:isbn:978-3-8387-0337-4:", "9783838703374"}, + []string{"EPUB9788865971468-113465", "9788865971468"}, + } + + for _, isbn := range isbn_test { + src := isbn[0] + dst := isbn[1] + if res := ISBN(src); res != dst { + t.Error("ISBN parse failed: ", src, " => ", res, " (expected ", dst, ")") + } + } +} diff --git a/parser/parser.go b/parser/parser.go index d26f6b9..63556b9 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -4,7 +4,7 @@ import ( "regexp" "strings" - "git.gitorious.org/go-pkg/epubgo.git" + "github.com/meskio/epubgo" ) type MetaData map[string]interface{} @@ -33,7 +33,10 @@ func EpubMetadata(epub *epubgo.Epub) MetaData { attr, _ := epub.MetadataAttr(m) for i, d := range data { if attr[i]["scheme"] == "ISBN" { - metadata["isbn"] = d + isbn := ISBN(d) + if isbn != "" { + metadata["isbn"] = isbn + } } } default: