Use text search
This commit is contained in:
parent
fe74245dad
commit
078b210392
2 changed files with 21 additions and 51 deletions
3
README
3
README
|
@ -13,7 +13,7 @@ https://gitorious.org/trantor/
|
||||||
In order to run Trantor, you need to install the following packages:
|
In order to run Trantor, you need to install the following packages:
|
||||||
|
|
||||||
* Go language
|
* Go language
|
||||||
* Mongodb
|
* Mongodb (>= 2.6)
|
||||||
* Bazaar
|
* Bazaar
|
||||||
* Git
|
* Git
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ Yo also need to install go dependences:
|
||||||
# go get gopkg.in/mgo.v2 gopkg.in/mgo.v2/bson github.com/gorilla/sessions \
|
# go get gopkg.in/mgo.v2 gopkg.in/mgo.v2/bson github.com/gorilla/sessions \
|
||||||
github.com/gorilla/securecookie github.com/gorilla/mux \
|
github.com/gorilla/securecookie github.com/gorilla/mux \
|
||||||
github.com/nfnt/resize github.com/cihub/seelog \
|
github.com/nfnt/resize github.com/cihub/seelog \
|
||||||
gopkgs.com/unidecode.v1 \
|
|
||||||
code.google.com/p/go.crypto/scrypt
|
code.google.com/p/go.crypto/scrypt
|
||||||
|
|
||||||
== Installation ==
|
== Installation ==
|
||||||
|
|
|
@ -4,11 +4,9 @@ import (
|
||||||
log "github.com/cihub/seelog"
|
log "github.com/cihub/seelog"
|
||||||
|
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
|
||||||
|
|
||||||
"gopkg.in/mgo.v2"
|
"gopkg.in/mgo.v2"
|
||||||
"gopkg.in/mgo.v2/bson"
|
"gopkg.in/mgo.v2/bson"
|
||||||
"gopkgs.com/unidecode.v1"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -38,7 +36,6 @@ type Book struct {
|
||||||
Active bool
|
Active bool
|
||||||
BadQuality int `bad_quality`
|
BadQuality int `bad_quality`
|
||||||
BadQualityReporters []string `bad_quality_reporters`
|
BadQualityReporters []string `bad_quality_reporters`
|
||||||
Keywords []string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func indexBooks(coll *mgo.Collection) {
|
func indexBooks(coll *mgo.Collection) {
|
||||||
|
@ -56,8 +53,9 @@ func indexBooks(coll *mgo.Collection) {
|
||||||
Key: []string{"active", "-bad_quality", "-_id"},
|
Key: []string{"active", "-bad_quality", "-_id"},
|
||||||
Background: true,
|
Background: true,
|
||||||
},
|
},
|
||||||
|
// TODO: there is no weights in mgo
|
||||||
}
|
}
|
||||||
for _, k := range []string{"keywords", "lang", "title", "author", "subject"} {
|
for _, k := range []string{"lang", "title", "author", "subject"} {
|
||||||
idx := mgo.Index{
|
idx := mgo.Index{
|
||||||
Key: []string{"active", k, "-_id"},
|
Key: []string{"active", k, "-_id"},
|
||||||
Background: true,
|
Background: true,
|
||||||
|
@ -74,7 +72,7 @@ func indexBooks(coll *mgo.Collection) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func addBook(coll *mgo.Collection, book map[string]interface{}) error {
|
func addBook(coll *mgo.Collection, book map[string]interface{}) error {
|
||||||
book["keywords"] = keywords(book)
|
book["_lang"] = metadataLang(book)
|
||||||
return coll.Insert(book)
|
return coll.Insert(book)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,13 +85,13 @@ func getNewBooks(coll *mgo.Collection, length int, start int) (books []Book, num
|
||||||
}
|
}
|
||||||
|
|
||||||
func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) {
|
func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) {
|
||||||
sort := []string{}
|
sort := []string{"$textScore:score"}
|
||||||
if _, present := query["bad_quality"]; present {
|
if _, present := query["bad_quality"]; present {
|
||||||
sort = append(sort, "-bad_quality")
|
sort = append(sort, "-bad_quality")
|
||||||
}
|
}
|
||||||
sort = append(sort, "-_id")
|
sort = append(sort, "-_id")
|
||||||
|
|
||||||
q := coll.Find(query).Sort(sort...)
|
q := coll.Find(query).Select(bson.M{"score": bson.M{"$meta": "textScore"}}).Sort(sort...)
|
||||||
num, err = q.Count()
|
num, err = q.Count()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
|
@ -129,7 +127,9 @@ func updateBook(coll *mgo.Collection, id string, data map[string]interface{}) er
|
||||||
book[k] = v
|
book[k] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
data["keywords"] = keywords(book)
|
if lang := metadataLang(book); lang != "" {
|
||||||
|
data["_lang"] = lang
|
||||||
|
}
|
||||||
return coll.Update(bson.M{"id": id}, bson.M{"$set": data})
|
return coll.Update(bson.M{"id": id}, bson.M{"$set": data})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ func isBookActive(coll *mgo.Collection, id string) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildQuery(q string) bson.M {
|
func buildQuery(q string) bson.M {
|
||||||
var keywords []string
|
text := ""
|
||||||
query := bson.M{"active": true}
|
query := bson.M{"active": true}
|
||||||
words := strings.Split(q, " ")
|
words := strings.Split(q, " ")
|
||||||
for _, w := range words {
|
for _, w := range words {
|
||||||
|
@ -180,51 +180,22 @@ func buildQuery(q string) bson.M {
|
||||||
query[tag[0]] = bson.RegEx{tag[1], "i"} //FIXME: this should be a list
|
query[tag[0]] = bson.RegEx{tag[1], "i"} //FIXME: this should be a list
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
toks := tokens(w)
|
if len(text) != 0 {
|
||||||
keywords = append(keywords, toks...)
|
text += " "
|
||||||
|
}
|
||||||
|
text += w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(keywords) > 0 {
|
if len(text) > 0 {
|
||||||
query["keywords"] = bson.M{"$all": keywords}
|
query["$text"] = bson.M{"$search": text}
|
||||||
}
|
}
|
||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
|
|
||||||
func keywords(b map[string]interface{}) (k []string) {
|
func metadataLang(book map[string]interface{}) string {
|
||||||
title, _ := b["title"].(string)
|
lang, ok := book["lang"].([]string)
|
||||||
k = tokens(title)
|
if !ok || len(lang) == 0 || len(lang[0]) < 2 {
|
||||||
|
return ""
|
||||||
k = append(k, listKeywords(b["author"])...)
|
|
||||||
|
|
||||||
publisher, _ := b["publisher"].(string)
|
|
||||||
k = append(k, tokens(publisher)...)
|
|
||||||
|
|
||||||
k = append(k, listKeywords(b["subject"])...)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func listKeywords(v interface{}) (k []string) {
|
|
||||||
list, ok := v.([]string)
|
|
||||||
if !ok {
|
|
||||||
list, _ := v.([]interface{})
|
|
||||||
for _, e := range list {
|
|
||||||
str := e.(string)
|
|
||||||
k = append(k, tokens(str)...)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
return strings.ToLower(lang[0][0:2])
|
||||||
for _, e := range list {
|
|
||||||
k = append(k, tokens(e)...)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func tokens(str string) []string {
|
|
||||||
str = unidecode.Unidecode(str)
|
|
||||||
str = strings.ToLower(str)
|
|
||||||
f := func(r rune) bool {
|
|
||||||
return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r)
|
|
||||||
}
|
|
||||||
return strings.FieldsFunc(str, f)
|
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue