Use text search

This commit is contained in:
Las Zenow 2014-10-26 12:12:27 -06:00
parent fe74245dad
commit 078b210392
2 changed files with 21 additions and 51 deletions

3
README
View file

@ -13,7 +13,7 @@ https://gitorious.org/trantor/
In order to run Trantor, you need to install the following packages: In order to run Trantor, you need to install the following packages:
* Go language * Go language
* Mongodb * Mongodb (>= 2.6)
* Bazaar * Bazaar
* Git * Git
@ -26,7 +26,6 @@ Yo also need to install go dependences:
# go get gopkg.in/mgo.v2 gopkg.in/mgo.v2/bson github.com/gorilla/sessions \ # go get gopkg.in/mgo.v2 gopkg.in/mgo.v2/bson github.com/gorilla/sessions \
github.com/gorilla/securecookie github.com/gorilla/mux \ github.com/gorilla/securecookie github.com/gorilla/mux \
github.com/nfnt/resize github.com/cihub/seelog \ github.com/nfnt/resize github.com/cihub/seelog \
gopkgs.com/unidecode.v1 \
code.google.com/p/go.crypto/scrypt code.google.com/p/go.crypto/scrypt
== Installation == == Installation ==

View file

@ -4,11 +4,9 @@ import (
log "github.com/cihub/seelog" log "github.com/cihub/seelog"
"strings" "strings"
"unicode"
"gopkg.in/mgo.v2" "gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson" "gopkg.in/mgo.v2/bson"
"gopkgs.com/unidecode.v1"
) )
const ( const (
@ -38,7 +36,6 @@ type Book struct {
Active bool Active bool
BadQuality int `bad_quality` BadQuality int `bad_quality`
BadQualityReporters []string `bad_quality_reporters` BadQualityReporters []string `bad_quality_reporters`
Keywords []string
} }
func indexBooks(coll *mgo.Collection) { func indexBooks(coll *mgo.Collection) {
@ -56,8 +53,9 @@ func indexBooks(coll *mgo.Collection) {
Key: []string{"active", "-bad_quality", "-_id"}, Key: []string{"active", "-bad_quality", "-_id"},
Background: true, Background: true,
}, },
// TODO: there is no weights in mgo
} }
for _, k := range []string{"keywords", "lang", "title", "author", "subject"} { for _, k := range []string{"lang", "title", "author", "subject"} {
idx := mgo.Index{ idx := mgo.Index{
Key: []string{"active", k, "-_id"}, Key: []string{"active", k, "-_id"},
Background: true, Background: true,
@ -74,7 +72,7 @@ func indexBooks(coll *mgo.Collection) {
} }
func addBook(coll *mgo.Collection, book map[string]interface{}) error { func addBook(coll *mgo.Collection, book map[string]interface{}) error {
book["keywords"] = keywords(book) book["_lang"] = metadataLang(book)
return coll.Insert(book) return coll.Insert(book)
} }
@ -87,13 +85,13 @@ func getNewBooks(coll *mgo.Collection, length int, start int) (books []Book, num
} }
func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) { func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) {
sort := []string{} sort := []string{"$textScore:score"}
if _, present := query["bad_quality"]; present { if _, present := query["bad_quality"]; present {
sort = append(sort, "-bad_quality") sort = append(sort, "-bad_quality")
} }
sort = append(sort, "-_id") sort = append(sort, "-_id")
q := coll.Find(query).Sort(sort...) q := coll.Find(query).Select(bson.M{"score": bson.M{"$meta": "textScore"}}).Sort(sort...)
num, err = q.Count() num, err = q.Count()
if err != nil { if err != nil {
return return
@ -129,7 +127,9 @@ func updateBook(coll *mgo.Collection, id string, data map[string]interface{}) er
book[k] = v book[k] = v
} }
data["keywords"] = keywords(book) if lang := metadataLang(book); lang != "" {
data["_lang"] = lang
}
return coll.Update(bson.M{"id": id}, bson.M{"$set": data}) return coll.Update(bson.M{"id": id}, bson.M{"$set": data})
} }
@ -168,7 +168,7 @@ func isBookActive(coll *mgo.Collection, id string) bool {
} }
func buildQuery(q string) bson.M { func buildQuery(q string) bson.M {
var keywords []string text := ""
query := bson.M{"active": true} query := bson.M{"active": true}
words := strings.Split(q, " ") words := strings.Split(q, " ")
for _, w := range words { for _, w := range words {
@ -180,51 +180,22 @@ func buildQuery(q string) bson.M {
query[tag[0]] = bson.RegEx{tag[1], "i"} //FIXME: this should be a list query[tag[0]] = bson.RegEx{tag[1], "i"} //FIXME: this should be a list
} }
} else { } else {
toks := tokens(w) if len(text) != 0 {
keywords = append(keywords, toks...) text += " "
}
text += w
} }
} }
if len(keywords) > 0 { if len(text) > 0 {
query["keywords"] = bson.M{"$all": keywords} query["$text"] = bson.M{"$search": text}
} }
return query return query
} }
func keywords(b map[string]interface{}) (k []string) { func metadataLang(book map[string]interface{}) string {
title, _ := b["title"].(string) lang, ok := book["lang"].([]string)
k = tokens(title) if !ok || len(lang) == 0 || len(lang[0]) < 2 {
return ""
k = append(k, listKeywords(b["author"])...)
publisher, _ := b["publisher"].(string)
k = append(k, tokens(publisher)...)
k = append(k, listKeywords(b["subject"])...)
return
}
func listKeywords(v interface{}) (k []string) {
list, ok := v.([]string)
if !ok {
list, _ := v.([]interface{})
for _, e := range list {
str := e.(string)
k = append(k, tokens(str)...)
}
return
} }
return strings.ToLower(lang[0][0:2])
for _, e := range list {
k = append(k, tokens(e)...)
}
return
}
func tokens(str string) []string {
str = unidecode.Unidecode(str)
str = strings.ToLower(str)
f := func(r rune) bool {
return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r)
}
return strings.FieldsFunc(str, f)
} }