[WIP] migration to psql

TODO:
[ ] stats
[ ] indexes
This commit is contained in:
Las Zenow 2016-07-30 07:10:33 -04:00
parent e1bd235785
commit e72de38725
24 changed files with 648 additions and 936 deletions

View file

@ -1,238 +1,210 @@
package database
import (
log "github.com/cihub/seelog"
"strings"
"time"
"gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson"
)
const (
books_coll = "books"
)
// TODO: Author -> Authors, Subject -> Tags
type Book struct {
Id string
Title string
Author []string
Contributor string
Publisher string
Description string
Subject []string
Date string
Lang []string
Isbn string
Type string
Format string
Source string
Relation string
Coverage string
Rights string
Meta string
FileSize int
Cover bool
Active bool
BadQuality int `bad_quality`
BadQualityReporters []string `bad_quality_reporters`
Id string
Title string
Author []string `sql:"authors" pg:",array"`
Contributor string
Publisher string
Description string
Subject []string `sql:"tags" pg:",array"`
Date string
Lang string
Isbn string
FileSize int
Cover bool
Active bool
UploadDate time.Time
Tsv string
//BadQuality int `bad_quality`
BadQualityReporters []string `sql:"-"` // TODO: deprecate??
}
type history struct {
Date time.Time
Changes bson.M
}
// TODO: missing history
func indexBooks(coll *mgo.Collection) {
indexes := []mgo.Index{
{
Key: []string{"id"},
Unique: true,
Background: true,
},
{
Key: []string{"active", "-_id"},
Background: true,
},
{
Key: []string{"active", "-bad_quality", "-_id"},
Background: true,
},
{
Key: []string{"$text:title", "$text:author", "$text:contributor",
"$text:publisher", "$text:subject", "$text:description"},
Weights: map[string]int{"title": 20, "author": 20, "contributor": 15,
"publisher": 15, "subject": 10, "description": 5},
LanguageOverride: "_lang",
Background: true,
},
}
for _, k := range []string{"lang", "title", "author", "subject"} {
idx := mgo.Index{
Key: []string{"active", k, "-_id"},
Background: true,
}
indexes = append(indexes, idx)
// AddBook to the database
func (db *pgDB) AddBook(book Book) error {
emptyTime := time.Time{}
if book.UploadDate == emptyTime {
book.UploadDate = time.Now()
}
for _, idx := range indexes {
err := coll.EnsureIndex(idx)
if err != nil {
log.Error("Error indexing books: ", err)
}
}
return db.sql.Create(&book)
}
func addBook(coll *mgo.Collection, book map[string]interface{}) error {
book["_lang"] = metadataLang(book)
return coll.Insert(book)
// GetBooks matching query
func (db *pgDB) GetBooks(query string, length int, start int) (books []Book, num int, err error) {
return db.getBooks(true, query, length, start)
}
func getBooks(coll *mgo.Collection, query string, length int, start int) (books []Book, num int, err error) {
q := buildQuery(query)
q["active"] = true
return _getBooks(coll, q, length, start)
// TODO: func (db *pgDB) GetBooksIter() Iter {
func (db *pgDB) GetNewBooks(query string, length int, start int) (books []Book, num int, err error) {
return db.getBooks(false, query, length, start)
}
func getNewBooks(coll *mgo.Collection, query string, length int, start int) (books []Book, num int, err error) {
q := buildQuery(query)
q["$nor"] = []bson.M{{"active": true}}
return _getBooks(coll, q, length, start)
}
func (db *pgDB) getBooks(active bool, query string, length int, start int) (books []Book, num int, err error) {
sqlQuery := db.sql.Model(&books)
func getBooksIter(coll *mgo.Collection) Iter {
return coll.Find(bson.M{}).Iter()
}
func _getBooks(coll *mgo.Collection, query bson.M, length int, start int) (books []Book, num int, err error) {
q := getBookQuery(coll, query)
num, err = q.Count()
if err != nil {
return
}
if start != 0 {
q = q.Skip(start)
}
if length != 0 {
q = q.Limit(length)
searchCondition := "active = "
if active {
searchCondition = "true"
} else {
searchCondition = "false"
}
err = q.All(&books)
return
}
func getBookQuery(coll *mgo.Collection, query bson.M) *mgo.Query {
sort := []string{"$textScore:score"}
if _, present := query["bad_quality"]; present {
sort = append(sort, "-bad_quality")
params := []interface{}{}
textQuery, columnQuerys := buildQuery(query)
for _, c := range columnQuerys {
searchCondition = searchCondition + " AND " + c.column + " ILIKE ?"
params = append(params, c.value)
}
sort = append(sort, "-_id")
if textQuery != "" {
searchCondition = searchCondition + " AND to_tsquery(?) @@ tsv"
params = append(params, textQuery)
}
sqlQuery = sqlQuery.Where(searchCondition, params...)
return coll.Find(query).Select(bson.M{"score": bson.M{"$meta": "textScore"}}).Sort(sort...)
if textQuery != "" {
sqlQuery = sqlQuery.Order("ts_rank(tsv, to_tsquery(?)) DESC, upload_date DESC", textQuery)
} else {
sqlQuery = sqlQuery.Order("upload_date DESC")
}
num, err = sqlQuery.
Offset(start).
Limit(length).
SelectAndCountEstimate(100)
return books, num, err
}
func getBookId(coll *mgo.Collection, id string) (Book, error) {
func (db *pgDB) GetBookId(id string) (Book, error) {
var book Book
err := coll.Find(bson.M{"id": id}).One(&book)
err := db.sql.Model(&book).
Where("id = ?", id).
Select()
return book, err
}
func deleteBook(coll *mgo.Collection, id string) error {
return coll.Remove(bson.M{"id": id})
func (db *pgDB) DeleteBook(id string) error {
_, err := db.sql.Model(&Book{}).
Where("id = ?", id).
Delete()
return err
}
func updateBook(coll *mgo.Collection, id string, data map[string]interface{}) error {
var book map[string]interface{}
record := history{time.Now(), bson.M{}}
err := coll.Find(bson.M{"id": id}).One(&book)
if err != nil {
return err
}
for k, _ := range data {
record.Changes[k] = book[k]
if k == "lang" {
data["_lang"] = metadataLang(data)
func (db *pgDB) UpdateBook(id string, data map[string]interface{}) error {
setCondition := ""
params := []interface{}{}
for col, val := range data {
colValid := false
for _, name := range []string{"title", "authors", "contributor", "publisher",
"description", "tags", "date", "lang", "isbn"} {
if col == name {
colValid = true
break
}
}
}
return coll.Update(bson.M{"id": id}, bson.M{"$set": data, "$push": bson.M{"history": record}})
}
func flagBadQuality(coll *mgo.Collection, id string, user string) error {
b, err := getBookId(coll, id)
if err != nil {
return err
}
for _, reporter := range b.BadQualityReporters {
if reporter == user {
return nil
if !colValid {
continue
}
if len(setCondition) != 0 {
setCondition += ", "
}
setCondition += col + " = ?"
params = append(params, val)
}
return coll.Update(
bson.M{"id": id},
bson.M{
"$inc": bson.M{"bad_quality": 1},
"$addToSet": bson.M{"bad_quality_reporters": user},
},
)
_, err := db.sql.Model(&Book{}).
Set(setCondition, params...).
Where("id = ?", id).
Update()
return err
}
func activeBook(coll *mgo.Collection, id string) error {
data := map[string]interface{}{"active": true}
return coll.Update(bson.M{"id": id}, bson.M{"$set": data})
func (db *pgDB) FlagBadQuality(id string, user string) error {
// TODO: delete me
return nil
}
func isBookActive(coll *mgo.Collection, id string) bool {
var book Book
err := coll.Find(bson.M{"id": id}).One(&book)
if err != nil {
func (db *pgDB) ActiveBook(id string) error {
uploadDate := time.Now()
_, err := db.sql.Model(&Book{}).
Set("active = true, upload_date = ? ", uploadDate).
Where("id = ?", id).
Update()
return err
}
func (db *pgDB) IsBookActive(id string) bool {
var active []bool
err := db.sql.Model(&Book{}).
Column("active").
Where("id = ?", id).
Select(&active)
if err != nil || len(active) != 1 {
return false
}
return book.Active
return active[0]
}
func buildQuery(q string) bson.M {
text := ""
query := bson.M{}
words := strings.Split(q, " ")
type columnq struct {
column string
value string
}
func buildQuery(query string) (string, []columnq) {
textQuery := ""
columnQuerys := []columnq{}
words := strings.Split(query, " ")
for _, w := range words {
if w == "" {
continue
}
tag := strings.SplitN(w, ":", 2)
if len(tag) > 1 {
if tag[0] == "flag" {
query[tag[1]] = bson.M{"$gt": 0}
} else {
query[tag[0]] = bson.RegEx{tag[1], "i"} //FIXME: this should be a list
if len(tag) > 1 && tag[1] != "" {
value := strings.Replace(tag[1], "%", "\\%", 0)
value = strings.Replace(value, "_", "\\_", 0)
expr := "%" + value + "%"
switch tag[0] {
case "lang":
columnQuerys = append(columnQuerys, columnq{"lang", value})
case "author":
columnQuerys = append(columnQuerys, columnq{"array_to_string(authors, ' ')", expr})
case "title":
columnQuerys = append(columnQuerys, columnq{"title", expr})
case "contributor":
columnQuerys = append(columnQuerys, columnq{"contributor", expr})
case "publisher":
columnQuerys = append(columnQuerys, columnq{"publisher", expr})
case "subject":
expr = strings.ToLower(expr)
columnQuerys = append(columnQuerys, columnq{"array_to_string(tags, ' ')", expr})
case "tag":
expr = strings.ToLower(expr)
columnQuerys = append(columnQuerys, columnq{"array_to_string(tag, ' ')", expr})
case "isbn":
columnQuerys = append(columnQuerys, columnq{"isbn", expr})
case "description":
columnQuerys = append(columnQuerys, columnq{"description", expr})
}
} else {
if len(text) != 0 {
text += " "
if len(textQuery) != 0 {
lastChar := textQuery[len(textQuery)-1:]
if w != "&" && w != "|" && lastChar != "&" && lastChar != "|" {
textQuery += " | "
} else {
textQuery += " "
}
}
text += w
textQuery += w
}
}
if len(text) > 0 {
query["$text"] = bson.M{"$search": text}
}
return query
}
func metadataLang(book map[string]interface{}) string {
text_search_langs := map[string]bool{
"da": true, "nl": true, "en": true, "fi": true, "fr": true, "de": true,
"hu": true, "it": true, "nb": true, "pt": true, "ro": true, "ru": true,
"es": true, "sv": true, "tr": true}
lang, ok := book["lang"].([]string)
if !ok || len(lang) == 0 || len(lang[0]) < 2 {
return "none"
}
lcode := strings.ToLower(lang[0][0:2])
if text_search_langs[lcode] {
return lcode
}
return "none"
return textQuery, columnQuerys
}