package parser import ( "io/ioutil" "strings" "unicode/utf8" "github.com/jmhodges/gocld2" "github.com/meskio/epubgo" ) func GuessLang(epub *epubgo.Epub, origLangs []string) string { spine, err := epub.Spine() if err != nil { return normalizeLangs(origLangs) } var errSpine error errSpine = nil langs := []string{} for errSpine == nil { html, err := spine.Open() errSpine = spine.Next() if err != nil { continue } defer html.Close() buff, err := ioutil.ReadAll(html) if err != nil { continue } if utf8.Valid(buff) { langs = append(langs, cld2.Detect(string(buff))) } } lang := commonLang(langs) if lang == "un" { return normalizeLangs(origLangs) } return lang } func commonLang(langs []string) string { count := map[string]int{} for _, l := range langs { count[l]++ } lang := "un" maxcount := 0 for l, c := range count { if c > maxcount && l != "un" { lang = l maxcount = c } } return lang } func normalizeLangs(langs []string) string { if len(langs) == 0 { return "un" } lang := langs[0] if len(lang) > 3 { lang = lang[0:2] } lang = strings.ToLower(lang) return lang }