[WIP] migration to psql
TODO: [ ] stats [ ] indexes
This commit is contained in:
parent
e1bd235785
commit
e72de38725
24 changed files with 648 additions and 936 deletions
|
@ -3,23 +3,24 @@ package parser
|
|||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/jmhodges/gocld2"
|
||||
"github.com/meskio/epubgo"
|
||||
)
|
||||
|
||||
func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
|
||||
func GuessLang(epub *epubgo.Epub, origLangs []string) string {
|
||||
spine, err := epub.Spine()
|
||||
if err != nil {
|
||||
return orig_langs
|
||||
return normalizeLangs(origLangs)
|
||||
}
|
||||
|
||||
var err_spine error
|
||||
err_spine = nil
|
||||
var errSpine error
|
||||
errSpine = nil
|
||||
langs := []string{}
|
||||
for err_spine == nil {
|
||||
for errSpine == nil {
|
||||
html, err := spine.Open()
|
||||
err_spine = spine.Next()
|
||||
errSpine = spine.Next()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
@ -29,14 +30,16 @@ func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
|
|||
if err != nil {
|
||||
continue
|
||||
}
|
||||
langs = append(langs, cld2.Detect(string(buff)))
|
||||
if utf8.Valid(buff) {
|
||||
langs = append(langs, cld2.Detect(string(buff)))
|
||||
}
|
||||
}
|
||||
|
||||
lang := commonLang(langs)
|
||||
if lang != "un" && differentLang(lang, orig_langs) {
|
||||
return []string{lang}
|
||||
if lang == "un" {
|
||||
return normalizeLangs(origLangs)
|
||||
}
|
||||
return orig_langs
|
||||
return lang
|
||||
}
|
||||
|
||||
func commonLang(langs []string) string {
|
||||
|
@ -56,11 +59,14 @@ func commonLang(langs []string) string {
|
|||
return lang
|
||||
}
|
||||
|
||||
func differentLang(lang string, orig_langs []string) bool {
|
||||
orig_lang := "un"
|
||||
if len(orig_langs) > 0 && len(orig_langs) >= 2 {
|
||||
orig_lang = strings.ToLower(orig_langs[0][0:2])
|
||||
func normalizeLangs(langs []string) string {
|
||||
lang := "un"
|
||||
if len(langs) > 0 {
|
||||
lang = langs[0]
|
||||
if len(lang) > 3 {
|
||||
lang = lang[0:2]
|
||||
}
|
||||
lang = strings.ToLower(lang)
|
||||
}
|
||||
|
||||
return orig_lang != lang
|
||||
return "un"
|
||||
}
|
||||
|
|
Reference in a new issue