package parser import ( "strings" "github.com/abadojack/whatlanggo" "github.com/meskio/epubgo" "github.com/microcosm-cc/bluemonday" ) func GuessLang(epub *epubgo.Epub, origLangs []string) string { cleaner := bluemonday.StrictPolicy() spine, err := epub.Spine() if err != nil { return normalizeLangs(origLangs) } var errSpine error errSpine = nil langs := []string{} for errSpine == nil { html, err := spine.Open() errSpine = spine.Next() if err != nil { continue } defer html.Close() buff := cleaner.SanitizeReader(html) info := whatlanggo.Detect(buff.String()) if info.Confidence >= whatlanggo.ReliableConfidenceThreshold { langs = append(langs, info.Lang.Iso6391()) } } lang := commonLang(langs) if lang == "un" { return normalizeLangs(origLangs) } return normalizeLangs([]string{lang}) } func commonLang(langs []string) string { count := map[string]int{} for _, l := range langs { count[l]++ } lang := "un" maxcount := 0 for l, c := range count { if c > maxcount && l != "un" { lang = l maxcount = c } } return lang } func normalizeLangs(langs []string) string { if len(langs) == 0 { return "un" } lang := strings.Split(langs[0], "-")[0] if len(lang) > 3 { lang = lang[0:2] } lang = strings.ToLower(lang) return lang }