Tockenize converting to ascii

This commit is contained in:
Las Zenow 2014-06-29 22:49:50 -05:00
parent 59eaa4e2aa
commit 30af19cd62
3 changed files with 24 additions and 9 deletions

3
README
View file

@ -25,7 +25,8 @@ Yo also need to install go dependences:
# go get labix.org/v2/mgo/bson labix.org/v2/mgo/ github.com/gorilla/sessions \ # go get labix.org/v2/mgo/bson labix.org/v2/mgo/ github.com/gorilla/sessions \
github.com/gorilla/securecookie github.com/gorilla/mux \ github.com/gorilla/securecookie github.com/gorilla/mux \
github.com/nfnt/resize github.com/cihub/seelog github.com/nfnt/resize github.com/cihub/seelog \
gopkgs.com/unidecode.v1
== Installation == == Installation ==
=== For admins ("for developers" below) === === For admins ("for developers" below) ===

View file

@ -9,7 +9,7 @@ import (
) )
func buildQuery(q string) bson.M { func buildQuery(q string) bson.M {
var reg []bson.RegEx var keywords []string
query := bson.M{"active": true} query := bson.M{"active": true}
words := strings.Split(q, " ") words := strings.Split(q, " ")
for _, w := range words { for _, w := range words {
@ -17,11 +17,12 @@ func buildQuery(q string) bson.M {
if len(tag) > 1 { if len(tag) > 1 {
query[tag[0]] = bson.RegEx{tag[1], "i"} query[tag[0]] = bson.RegEx{tag[1], "i"}
} else { } else {
reg = append(reg, bson.RegEx{w, "i"}) toks := tokens(w)
keywords = append(keywords, toks...)
} }
} }
if len(reg) > 0 { if len(keywords) > 0 {
query["keywords"] = bson.M{"$all": reg} query["keywords"] = bson.M{"$all": keywords}
} }
return query return query
} }

View file

@ -4,11 +4,13 @@ import (
"bytes" "bytes"
"git.gitorious.org/go-pkg/epubgo.git" "git.gitorious.org/go-pkg/epubgo.git"
"git.gitorious.org/trantor/trantor.git/database" "git.gitorious.org/trantor/trantor.git/database"
"gopkgs.com/unidecode.v1"
"io" "io"
"io/ioutil" "io/ioutil"
"labix.org/v2/mgo/bson" "labix.org/v2/mgo/bson"
"regexp" "regexp"
"strings" "strings"
"unicode"
) )
func OpenBook(id bson.ObjectId, db *database.DB) (*epubgo.Epub, error) { func OpenBook(id bson.ObjectId, db *database.DB) (*epubgo.Epub, error) {
@ -116,14 +118,25 @@ func parseDate(date []string) string {
func keywords(b map[string]interface{}) (k []string) { func keywords(b map[string]interface{}) (k []string) {
title, _ := b["title"].(string) title, _ := b["title"].(string)
k = strings.Split(title, " ") k = tokens(title)
author, _ := b["author"].([]string) author, _ := b["author"].([]string)
for _, a := range author { for _, a := range author {
k = append(k, strings.Split(a, " ")...) k = append(k, tokens(a)...)
} }
publisher, _ := b["publisher"].(string) publisher, _ := b["publisher"].(string)
k = append(k, strings.Split(publisher, " ")...) k = append(k, tokens(publisher)...)
subject, _ := b["subject"].([]string) subject, _ := b["subject"].([]string)
k = append(k, subject...) for _, s := range subject {
k = append(k, tokens(s)...)
}
return return
} }
func tokens(str string) []string {
str = unidecode.Unidecode(str)
str = strings.ToLower(str)
f := func(r rune) bool {
return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r)
}
return strings.FieldsFunc(str, f)
}