Replace gocld2 for whatlanggo
This commit is contained in:
parent
582298ce68
commit
dfc3415972
3 changed files with 19 additions and 12 deletions
|
@ -1,15 +1,15 @@
|
|||
package parser
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/jmhodges/gocld2"
|
||||
"github.com/abadojack/whatlanggo"
|
||||
"github.com/meskio/epubgo"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
)
|
||||
|
||||
func GuessLang(epub *epubgo.Epub, origLangs []string) string {
|
||||
cleaner := bluemonday.StrictPolicy()
|
||||
spine, err := epub.Spine()
|
||||
if err != nil {
|
||||
return normalizeLangs(origLangs)
|
||||
|
@ -26,12 +26,10 @@ func GuessLang(epub *epubgo.Epub, origLangs []string) string {
|
|||
}
|
||||
defer html.Close()
|
||||
|
||||
buff, err := ioutil.ReadAll(html)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if utf8.Valid(buff) {
|
||||
langs = append(langs, cld2.Detect(string(buff)))
|
||||
buff := cleaner.SanitizeReader(html)
|
||||
info := whatlanggo.Detect(buff.String())
|
||||
if info.Confidence >= whatlanggo.ReliableConfidenceThreshold {
|
||||
langs = append(langs, info.Lang.Iso6391())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Reference in a new issue