Tockenize converting to ascii
This commit is contained in:
parent
59eaa4e2aa
commit
30af19cd62
3 changed files with 24 additions and 9 deletions
3
README
3
README
|
@ -25,7 +25,8 @@ Yo also need to install go dependences:
|
|||
|
||||
# go get labix.org/v2/mgo/bson labix.org/v2/mgo/ github.com/gorilla/sessions \
|
||||
github.com/gorilla/securecookie github.com/gorilla/mux \
|
||||
github.com/nfnt/resize github.com/cihub/seelog
|
||||
github.com/nfnt/resize github.com/cihub/seelog \
|
||||
gopkgs.com/unidecode.v1
|
||||
|
||||
== Installation ==
|
||||
=== For admins ("for developers" below) ===
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
)
|
||||
|
||||
func buildQuery(q string) bson.M {
|
||||
var reg []bson.RegEx
|
||||
var keywords []string
|
||||
query := bson.M{"active": true}
|
||||
words := strings.Split(q, " ")
|
||||
for _, w := range words {
|
||||
|
@ -17,11 +17,12 @@ func buildQuery(q string) bson.M {
|
|||
if len(tag) > 1 {
|
||||
query[tag[0]] = bson.RegEx{tag[1], "i"}
|
||||
} else {
|
||||
reg = append(reg, bson.RegEx{w, "i"})
|
||||
toks := tokens(w)
|
||||
keywords = append(keywords, toks...)
|
||||
}
|
||||
}
|
||||
if len(reg) > 0 {
|
||||
query["keywords"] = bson.M{"$all": reg}
|
||||
if len(keywords) > 0 {
|
||||
query["keywords"] = bson.M{"$all": keywords}
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
|
21
store.go
21
store.go
|
@ -4,11 +4,13 @@ import (
|
|||
"bytes"
|
||||
"git.gitorious.org/go-pkg/epubgo.git"
|
||||
"git.gitorious.org/trantor/trantor.git/database"
|
||||
"gopkgs.com/unidecode.v1"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
func OpenBook(id bson.ObjectId, db *database.DB) (*epubgo.Epub, error) {
|
||||
|
@ -116,14 +118,25 @@ func parseDate(date []string) string {
|
|||
|
||||
func keywords(b map[string]interface{}) (k []string) {
|
||||
title, _ := b["title"].(string)
|
||||
k = strings.Split(title, " ")
|
||||
k = tokens(title)
|
||||
author, _ := b["author"].([]string)
|
||||
for _, a := range author {
|
||||
k = append(k, strings.Split(a, " ")...)
|
||||
k = append(k, tokens(a)...)
|
||||
}
|
||||
publisher, _ := b["publisher"].(string)
|
||||
k = append(k, strings.Split(publisher, " ")...)
|
||||
k = append(k, tokens(publisher)...)
|
||||
subject, _ := b["subject"].([]string)
|
||||
k = append(k, subject...)
|
||||
for _, s := range subject {
|
||||
k = append(k, tokens(s)...)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func tokens(str string) []string {
|
||||
str = unidecode.Unidecode(str)
|
||||
str = strings.ToLower(str)
|
||||
f := func(r rune) bool {
|
||||
return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r)
|
||||
}
|
||||
return strings.FieldsFunc(str, f)
|
||||
}
|
||||
|
|
Reference in a new issue