From 93d31b53feba1fb5130beeb0a34b87c76b7f5c91 Mon Sep 17 00:00:00 2001 From: Las Zenow Date: Wed, 2 Jul 2014 20:58:00 -0500 Subject: [PATCH] Move the query parsing to the database This implies to move the tokeinzation as well. --- admin.go | 5 ++-- database/books.go | 63 +++++++++++++++++++++++++++++++++++++++++--- database/database.go | 18 ++++++------- search.go | 22 +--------------- store.go | 27 ------------------- trantor.go | 3 +-- upload.go | 1 - 7 files changed, 73 insertions(+), 66 deletions(-) diff --git a/admin.go b/admin.go index 47f0146..c82b789 100644 --- a/admin.go +++ b/admin.go @@ -95,7 +95,6 @@ func saveHandler(h handler) { "author": author, "subject": subject, "lang": lang} - book["keywords"] = keywords(book) err := h.db.UpdateBook(id, book) if err != nil { notFound(h) @@ -155,8 +154,8 @@ func newHandler(h handler) { } for i, b := range res { data.Books[i].B = b - _, data.Books[i].TitleFound, _ = h.db.GetBooks(buildQuery("title:"+b.Title), 1, 0) - _, data.Books[i].AuthorFound, _ = h.db.GetBooks(buildQuery("author:"+strings.Join(b.Author, " author:")), 1, 0) + _, data.Books[i].TitleFound, _ = h.db.GetBooks("title:"+b.Title, 1, 0) + _, data.Books[i].AuthorFound, _ = h.db.GetBooks("author:"+strings.Join(b.Author, " author:"), 1, 0) } data.Page = page + 1 if num > (page+1)*NEW_ITEMS_PAGE { diff --git a/database/books.go b/database/books.go index ecd9356..1672ae9 100644 --- a/database/books.go +++ b/database/books.go @@ -2,8 +2,11 @@ package database import ( "errors" + "gopkgs.com/unidecode.v1" "labix.org/v2/mgo" "labix.org/v2/mgo/bson" + "strings" + "unicode" ) const ( @@ -36,11 +39,20 @@ type Book struct { Keywords []string } -func addBook(coll *mgo.Collection, book interface{}) error { +func addBook(coll *mgo.Collection, book map[string]interface{}) error { + book["keywords"] = keywords(book) return coll.Insert(book) } -func getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) { +func getBooks(coll *mgo.Collection, query string, length int, start int) (books []Book, num int, err error) { + return _getBooks(coll, buildQuery(query), length, start) +} + +func getNewBooks(coll *mgo.Collection, length int, start int) (books []Book, num int, err error) { + return _getBooks(coll, bson.M{"$nor": []bson.M{{"active": true}}}, length, start) +} + +func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) { q := coll.Find(query).Sort("-_id") num, err = q.Count() if err != nil { @@ -75,7 +87,8 @@ func deleteBook(coll *mgo.Collection, id bson.ObjectId) error { return coll.Remove(bson.M{"_id": id}) } -func updateBook(coll *mgo.Collection, id bson.ObjectId, data interface{}) error { +func updateBook(coll *mgo.Collection, id bson.ObjectId, data map[string]interface{}) error { + data["keywords"] = keywords(data) return coll.Update(bson.M{"_id": id}, bson.M{"$set": data}) } @@ -87,3 +100,47 @@ func bookActive(coll *mgo.Collection, id bson.ObjectId) bool { } return book.Active } + +func buildQuery(q string) bson.M { + var keywords []string + query := bson.M{"active": true} + words := strings.Split(q, " ") + for _, w := range words { + tag := strings.SplitN(w, ":", 2) + if len(tag) > 1 { + query[tag[0]] = bson.RegEx{tag[1], "i"} + } else { + toks := tokens(w) + keywords = append(keywords, toks...) + } + } + if len(keywords) > 0 { + query["keywords"] = bson.M{"$all": keywords} + } + return query +} + +func keywords(b map[string]interface{}) (k []string) { + title, _ := b["title"].(string) + k = tokens(title) + author, _ := b["author"].([]string) + for _, a := range author { + k = append(k, tokens(a)...) + } + publisher, _ := b["publisher"].(string) + k = append(k, tokens(publisher)...) + subject, _ := b["subject"].([]string) + for _, s := range subject { + k = append(k, tokens(s)...) + } + return +} + +func tokens(str string) []string { + str = unidecode.Unidecode(str) + str = strings.ToLower(str) + f := func(r rune) bool { + return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r) + } + return strings.FieldsFunc(str, f) +} diff --git a/database/database.go b/database/database.go index be77758..06a4613 100644 --- a/database/database.go +++ b/database/database.go @@ -43,37 +43,37 @@ func (db *DB) Copy() *DB { return dbCopy } -func (db *DB) AddBook(book interface{}) error { +func (db *DB) AddBook(book map[string]interface{}) error { booksColl := db.session.DB(db.name).C(books_coll) return addBook(booksColl, book) } -// FIXME: don't export bson data -func (db *DB) GetBooks(query bson.M, length int, start int) (books []Book, num int, err error) { +func (db *DB) GetBooks(query string, length int, start int) (books []Book, num int, err error) { booksColl := db.session.DB(db.name).C(books_coll) return getBooks(booksColl, query, length, start) } +func (db *DB) GetNewBooks(length int, start int) (books []Book, num int, err error) { + booksColl := db.session.DB(db.name).C(books_coll) + return getNewBooks(booksColl, length, start) +} + func (db *DB) GetBookId(id string) (Book, error) { booksColl := db.session.DB(db.name).C(books_coll) return getBookId(booksColl, id) } +// FIXME: don't export bson data func (db *DB) DeleteBook(id bson.ObjectId) error { booksColl := db.session.DB(db.name).C(books_coll) return deleteBook(booksColl, id) } -func (db *DB) UpdateBook(id bson.ObjectId, data interface{}) error { +func (db *DB) UpdateBook(id bson.ObjectId, data map[string]interface{}) error { booksColl := db.session.DB(db.name).C(books_coll) return updateBook(booksColl, id, data) } -func (db *DB) GetNewBooks(length int, start int) (books []Book, num int, err error) { - booksColl := db.session.DB(db.name).C(books_coll) - return getBooks(booksColl, bson.M{"$nor": []bson.M{{"active": true}}}, length, start) -} - func (db *DB) BookActive(id bson.ObjectId) bool { booksColl := db.session.DB(db.name).C(books_coll) return bookActive(booksColl, id) diff --git a/search.go b/search.go index b8f81fe..1267af8 100644 --- a/search.go +++ b/search.go @@ -2,31 +2,11 @@ package main import ( "git.gitorious.org/trantor/trantor.git/database" - "labix.org/v2/mgo/bson" "net/http" "strconv" "strings" ) -func buildQuery(q string) bson.M { - var keywords []string - query := bson.M{"active": true} - words := strings.Split(q, " ") - for _, w := range words { - tag := strings.SplitN(w, ":", 2) - if len(tag) > 1 { - query[tag[0]] = bson.RegEx{tag[1], "i"} - } else { - toks := tokens(w) - keywords = append(keywords, toks...) - } - } - if len(keywords) > 0 { - query["keywords"] = bson.M{"$all": keywords} - } - return query -} - type searchData struct { S Status Found int @@ -52,7 +32,7 @@ func searchHandler(h handler) { } } items_page := itemsPage(h.r) - res, num, _ := h.db.GetBooks(buildQuery(req), items_page, page*items_page) + res, num, _ := h.db.GetBooks(req, items_page, page*items_page) var data searchData data.S = GetStatus(h) diff --git a/store.go b/store.go index b7a4ad2..2a8f1f7 100644 --- a/store.go +++ b/store.go @@ -4,13 +4,11 @@ import ( "bytes" "git.gitorious.org/go-pkg/epubgo.git" "git.gitorious.org/trantor/trantor.git/database" - "gopkgs.com/unidecode.v1" "io" "io/ioutil" "labix.org/v2/mgo/bson" "regexp" "strings" - "unicode" ) func OpenBook(id bson.ObjectId, db *database.DB) (*epubgo.Epub, error) { @@ -115,28 +113,3 @@ func parseDate(date []string) string { } return strings.Replace(date[0], "Unspecified: ", "", -1) } - -func keywords(b map[string]interface{}) (k []string) { - title, _ := b["title"].(string) - k = tokens(title) - author, _ := b["author"].([]string) - for _, a := range author { - k = append(k, tokens(a)...) - } - publisher, _ := b["publisher"].(string) - k = append(k, tokens(publisher)...) - subject, _ := b["subject"].([]string) - for _, s := range subject { - k = append(k, tokens(s)...) - } - return -} - -func tokens(str string) []string { - str = unidecode.Unidecode(str) - str = strings.ToLower(str) - f := func(r rune) bool { - return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r) - } - return strings.FieldsFunc(str, f) -} diff --git a/trantor.go b/trantor.go index 7edf6af..533f620 100644 --- a/trantor.go +++ b/trantor.go @@ -6,7 +6,6 @@ import ( "git.gitorious.org/trantor/trantor.git/database" "github.com/gorilla/mux" "io" - "labix.org/v2/mgo/bson" "net/http" "strings" ) @@ -103,7 +102,7 @@ func indexHandler(h handler) { data.Tags, _ = h.db.GetTags() data.S = GetStatus(h) data.S.Home = true - data.Books, data.Count, _ = h.db.GetBooks(bson.M{"active": true}, BOOKS_FRONT_PAGE, 0) + data.Books, data.Count, _ = h.db.GetBooks("", BOOKS_FRONT_PAGE, 0) data.VisitedBooks, _ = h.db.GetVisitedBooks() data.DownloadedBooks, _ = h.db.GetDownloadedBooks() data.News = getNews(1, DAYS_NEWS_INDEXPAGE, h.db) diff --git a/upload.go b/upload.go index d155e76..832e0da 100644 --- a/upload.go +++ b/upload.go @@ -142,6 +142,5 @@ func parseFile(epub *epubgo.Epub, db *database.DB) map[string]interface{} { book["cover"] = cover book["coversmall"] = coverSmall } - book["keywords"] = keywords(book) return book }