This repository has been archived on 2025-03-01. You can view files and clone it, but cannot push or open issues or pull requests.
trantor/language.go

69 lines
1.2 KiB
Go
Raw Normal View History

2015-01-22 23:02:00 -06:00
// +build prod
package main
import (
"io/ioutil"
"strings"
"git.gitorious.org/go-pkg/epubgo.git"
"github.com/rainycape/cld2"
)
func GuessLang(epub *epubgo.Epub, orig_langs []string) []string {
spine, err := epub.Spine()
if err != nil {
return orig_langs
}
var err_spine error
err_spine = nil
langs := []string{}
for err_spine == nil {
html, err := spine.Open()
err_spine = spine.Next()
if err != nil {
continue
}
defer html.Close()
buff, err := ioutil.ReadAll(html)
if err != nil {
continue
}
langs = append(langs, cld2.Detect(string(buff)))
}
lang := commonLang(langs)
if lang != "un" && differentLang(lang, orig_langs) {
return []string{lang}
}
return orig_langs
}
func commonLang(langs []string) string {
count := map[string]int{}
for _, l := range langs {
count[l]++
}
lang := "un"
maxcount := 0
for l, c := range count {
if c > maxcount && l != "un" {
lang = l
maxcount = c
}
}
return lang
}
func differentLang(lang string, orig_langs []string) bool {
orig_lang := "un"
if len(orig_langs) > 0 && len(orig_langs) >= 2 {
orig_lang = strings.ToLower(orig_langs[0][0:2])
}
return orig_lang != lang
}