Add isbn parser

This commit is contained in:
Las Zenow 2015-02-08 00:16:29 -05:00
parent 09536bd0d8
commit 2f2ff3dd8f
3 changed files with 112 additions and 2 deletions

81
parser/isbn.go Normal file
View file

@ -0,0 +1,81 @@
package parser
import (
"strings"
"unicode"
)
func ISBN(orig string) string {
isbn := getISBN(orig)
if len(isbn) != 13 && len(isbn) != 10 {
return ""
}
if !validChecksum(isbn) {
return ""
}
return toISBN13(isbn)
}
func getISBN(src string) string {
isbn := strings.ToUpper(src)
isNotNumber := func(r rune) bool {
return !unicode.IsNumber(r)
}
isNotNumberOrX := func(r rune) bool {
return !unicode.IsNumber(r) && r != 'X'
}
isbn = strings.TrimLeftFunc(isbn, isNotNumber)
isbn = strings.TrimRightFunc(isbn, isNotNumberOrX)
isbn = strings.Replace(isbn, "-", "", -1)
isbn = strings.Replace(isbn, " ", "", -1)
if len(isbn) > 13 {
isbn = isbn[:13]
}
return isbn
}
func validChecksum(isbn string) bool {
if len(isbn) == 10 {
return rune(isbn[9]) == checkDigit10(isbn)
}
return rune(isbn[12]) == checkDigit13(isbn)
}
func toISBN13(isbn string) string {
if len(isbn) == 13 {
return isbn
}
isbn = "978" + isbn
return isbn[:12] + string(checkDigit13(isbn))
}
func checkDigit10(isbn string) rune {
acc := 0
for i, r := range isbn[:9] {
acc += (10 - i) * int(r-'0')
}
check := (11 - (acc % 11)) % 11
if check == 10 {
return 'X'
}
return rune(check + '0')
}
func checkDigit13(isbn string) rune {
acc := 0
for i, r := range isbn[:12] {
n := int(r - '0')
if i%2 == 1 {
n = 3 * n
}
acc += n
}
check := (10 - (acc % 10)) % 10
return rune(check + '0')
}

26
parser/isbn_test.go Normal file
View file

@ -0,0 +1,26 @@
package parser
import "testing"
func TestISBN(t *testing.T) {
isbn_test := [][]string{
[]string{"", ""},
[]string{"978074341", ""},
[]string{"9780743412395", ""},
[]string{"9780743412391", "9780743412391"},
[]string{"0-688-12189-6", "9780688121891"},
[]string{"033026155X", "9780330261555"},
[]string{"033026155x", "9780330261555"},
[]string{"0307756432", "9780307756435"},
[]string{"urn:isbn:978-3-8387-0337-4:", "9783838703374"},
[]string{"EPUB9788865971468-113465", "9788865971468"},
}
for _, isbn := range isbn_test {
src := isbn[0]
dst := isbn[1]
if res := ISBN(src); res != dst {
t.Error("ISBN parse failed: ", src, " => ", res, " (expected ", dst, ")")
}
}
}

View file

@ -4,7 +4,7 @@ import (
"regexp"
"strings"
"git.gitorious.org/go-pkg/epubgo.git"
"github.com/meskio/epubgo"
)
type MetaData map[string]interface{}
@ -33,7 +33,10 @@ func EpubMetadata(epub *epubgo.Epub) MetaData {
attr, _ := epub.MetadataAttr(m)
for i, d := range data {
if attr[i]["scheme"] == "ISBN" {
metadata["isbn"] = d
isbn := ISBN(d)
if isbn != "" {
metadata["isbn"] = isbn
}
}
}
default: