Replace gocld2 for whatlanggo

This commit is contained in:
Las Zenow 2021-02-10 15:41:20 +00:00
parent 582298ce68
commit dfc3415972
3 changed files with 19 additions and 12 deletions

View file

@ -1,15 +1,15 @@
package parser
import (
"io/ioutil"
"strings"
"unicode/utf8"
"github.com/jmhodges/gocld2"
"github.com/abadojack/whatlanggo"
"github.com/meskio/epubgo"
"github.com/microcosm-cc/bluemonday"
)
func GuessLang(epub *epubgo.Epub, origLangs []string) string {
cleaner := bluemonday.StrictPolicy()
spine, err := epub.Spine()
if err != nil {
return normalizeLangs(origLangs)
@ -26,12 +26,10 @@ func GuessLang(epub *epubgo.Epub, origLangs []string) string {
}
defer html.Close()
buff, err := ioutil.ReadAll(html)
if err != nil {
continue
}
if utf8.Valid(buff) {
langs = append(langs, cld2.Detect(string(buff)))
buff := cleaner.SanitizeReader(html)
info := whatlanggo.Detect(buff.String())
if info.Confidence >= whatlanggo.ReliableConfidenceThreshold {
langs = append(langs, info.Lang.Iso6391())
}
}