diff --git a/README b/README index aca998f..a82b9a3 100644 --- a/README +++ b/README @@ -26,14 +26,15 @@ Yo also need to install go dependences: # go get gopkg.in/mgo.v2 gopkg.in/mgo.v2/bson github.com/gorilla/sessions \ github.com/gorilla/securecookie github.com/gorilla/mux \ github.com/nfnt/resize github.com/cihub/seelog \ - code.google.com/p/go.crypto/scrypt + code.google.com/p/go.crypto/scrypt \ + github.com/rainycape/cld2 == Installation == === For admins ("for developers" below) === Now you can install Trantor itself: -# go get git.gitorious.org/trantor/trantor.git +# go get -tags prod git.gitorious.org/trantor/trantor.git You can run trantor in /srv/www/trantor i.e. For this: @@ -63,7 +64,8 @@ $ cd yournames-trantor You can edit config.go if you want to change the port and other configuration, by default is 8080 Now you can compile Trantor: -$ go build +$ go build -tags prod +(remove '-tags prod' for a faster compilation without language guessing) Now you can run it: $ ./yourname-trantor diff --git a/language.go b/language.go new file mode 100644 index 0000000..8ecc155 --- /dev/null +++ b/language.go @@ -0,0 +1,68 @@ +// +build prod + +package main + +import ( + "io/ioutil" + "strings" + + "git.gitorious.org/go-pkg/epubgo.git" + "github.com/rainycape/cld2" +) + +func GuessLang(epub *epubgo.Epub, orig_langs []string) []string { + spine, err := epub.Spine() + if err != nil { + return orig_langs + } + + var err_spine error + err_spine = nil + langs := []string{} + for err_spine == nil { + html, err := spine.Open() + err_spine = spine.Next() + if err != nil { + continue + } + defer html.Close() + + buff, err := ioutil.ReadAll(html) + if err != nil { + continue + } + langs = append(langs, cld2.Detect(string(buff))) + } + + lang := commonLang(langs) + if lang != "un" && differentLang(lang, orig_langs) { + return []string{lang} + } + return orig_langs +} + +func commonLang(langs []string) string { + count := map[string]int{} + for _, l := range langs { + count[l]++ + } + + lang := "un" + maxcount := 0 + for l, c := range count { + if c > maxcount && l != "un" { + lang = l + maxcount = c + } + } + return lang +} + +func differentLang(lang string, orig_langs []string) bool { + orig_lang := "un" + if len(orig_langs) > 0 && len(orig_langs) >= 2 { + orig_lang = strings.ToLower(orig_langs[0][0:2]) + } + + return orig_lang != lang +} diff --git a/language_develop.go b/language_develop.go new file mode 100644 index 0000000..69ee859 --- /dev/null +++ b/language_develop.go @@ -0,0 +1,16 @@ +// +build !prod + +// This is a dummy implementation of GuessLang used to make the compilation faster on development +// +// To build trantor with the proper language guessing do: +// $ go build -tags prod + +package main + +import ( + "git.gitorious.org/go-pkg/epubgo.git" +) + +func GuessLang(epub *epubgo.Epub, orig_langs []string) []string { + return orig_langs +} diff --git a/upload.go b/upload.go index 743283f..9a61d30 100644 --- a/upload.go +++ b/upload.go @@ -124,7 +124,7 @@ func parseFile(epub *epubgo.Epub, store *storage.Store) (metadata map[string]int case "date": book[m] = parseDate(data) case "language": - book["lang"] = data + book["lang"] = GuessLang(epub, data) case "title", "contributor", "publisher": book[m] = cleanStr(strings.Join(data, ", ")) case "identifier": @@ -140,7 +140,7 @@ func parseFile(epub *epubgo.Epub, store *storage.Store) (metadata map[string]int } id = genId() - book["id"] = id //TODO + book["id"] = id book["cover"] = GetCover(epub, id, store) return book, id }