[WIP] migration to psql

TODO:
[ ] stats
[ ] indexes
This commit is contained in:
Las Zenow 2016-07-30 07:10:33 -04:00
parent e1bd235785
commit e72de38725
24 changed files with 648 additions and 936 deletions

View file

@ -3,23 +3,24 @@ package parser
import (
"io/ioutil"
"strings"
"unicode/utf8"
"github.com/jmhodges/gocld2"
"github.com/meskio/epubgo"
)
func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
func GuessLang(epub *epubgo.Epub, origLangs []string) string {
spine, err := epub.Spine()
if err != nil {
return orig_langs
return normalizeLangs(origLangs)
}
var err_spine error
err_spine = nil
var errSpine error
errSpine = nil
langs := []string{}
for err_spine == nil {
for errSpine == nil {
html, err := spine.Open()
err_spine = spine.Next()
errSpine = spine.Next()
if err != nil {
continue
}
@ -29,14 +30,16 @@ func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
if err != nil {
continue
}
langs = append(langs, cld2.Detect(string(buff)))
if utf8.Valid(buff) {
langs = append(langs, cld2.Detect(string(buff)))
}
}
lang := commonLang(langs)
if lang != "un" && differentLang(lang, orig_langs) {
return []string{lang}
if lang == "un" {
return normalizeLangs(origLangs)
}
return orig_langs
return lang
}
func commonLang(langs []string) string {
@ -56,11 +59,14 @@ func commonLang(langs []string) string {
return lang
}
func differentLang(lang string, orig_langs []string) bool {
orig_lang := "un"
if len(orig_langs) > 0 && len(orig_langs) >= 2 {
orig_lang = strings.ToLower(orig_langs[0][0:2])
func normalizeLangs(langs []string) string {
lang := "un"
if len(langs) > 0 {
lang = langs[0]
if len(lang) > 3 {
lang = lang[0:2]
}
lang = strings.ToLower(lang)
}
return orig_lang != lang
return "un"
}