From 71c8a8946ac022afa6c218cce70af913b538ebc9 Mon Sep 17 00:00:00 2001 From: Las Zenow Date: Tue, 23 Apr 2013 11:13:55 +0200 Subject: [PATCH] Dirty migration tool for gridfs --- tools/README | 4 +- tools/togridfs/config.go | 32 +++++ tools/togridfs/cover.go | 137 ++++++++++++++++++++ tools/togridfs/database.go | 252 +++++++++++++++++++++++++++++++++++++ tools/togridfs/togridfs.go | 61 +++++++++ 5 files changed, 485 insertions(+), 1 deletion(-) create mode 100644 tools/togridfs/config.go create mode 100644 tools/togridfs/cover.go create mode 100644 tools/togridfs/database.go create mode 100644 tools/togridfs/togridfs.go diff --git a/tools/README b/tools/README index 0933b15..4c16403 100644 --- a/tools/README +++ b/tools/README @@ -1,7 +1,9 @@ -Some tools to manage trantor: +Some tools dirty to manage trantor: - adduser. Used to add users to trantor: $ adduser myNick Password: - update. Update the cover of all the books. It might be outdated. + +- togridfs (23/4/2013). Migrate all files and covers to gridfs diff --git a/tools/togridfs/config.go b/tools/togridfs/config.go new file mode 100644 index 0000000..8ed9110 --- /dev/null +++ b/tools/togridfs/config.go @@ -0,0 +1,32 @@ +package main + +const ( + PORT = "8080" + + DB_IP = "127.0.0.1" + DB_NAME = "trantor" + META_COLL = "meta" + BOOKS_COLL = "books" + TAGS_COLL = "tags" + USERS_COLL = "users" + STATS_COLL = "statistics" + FS_BOOKS = "fs_books" + FS_IMGS = "fs_imgs" + + PASS_SALT = "ImperialLibSalt" + MINUTES_UPDATE_TAGS = 10 + TAGS_DISPLAY = 50 + SEARCH_ITEMS_PAGE = 20 + NEW_ITEMS_PAGE = 50 + + TEMPLATE_PATH = "templates/" + CSS_PATH = "css/" + JS_PATH = "js/" + IMG_PATH = "img/" + + IMG_WIDTH_BIG = 300 + IMG_WIDTH_SMALL = 60 + IMG_QUALITY = 80 + + STATS_CHAN_SIZE = 100 +) diff --git a/tools/togridfs/cover.go b/tools/togridfs/cover.go new file mode 100644 index 0000000..87d6934 --- /dev/null +++ b/tools/togridfs/cover.go @@ -0,0 +1,137 @@ +package main + +import ( + "bytes" + "git.gitorious.org/go-pkg/epubgo.git" + "github.com/nfnt/resize" + "image" + "image/jpeg" + "io" + "io/ioutil" + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "log" + "regexp" + "strings" +) + +func GetCover(e *epubgo.Epub, title string) (bson.ObjectId, bson.ObjectId) { + imgId, smallId := searchCommonCoverNames(e, title) + if imgId != "" { + return imgId, smallId + } + + /* search for img on the text */ + exp, _ := regexp.Compile(" 0 { + return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data, "$unset": unset[0]}) + } + return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data}) +} + +func (d *DB) IncVisit(id bson.ObjectId) error { + return d.books.Update(bson.M{"_id": id}, bson.M{"$inc": bson.M{"VisitsCount": 1}}) +} + +func (d *DB) IncDownload(id bson.ObjectId) error { + return d.books.Update(bson.M{"_id": id}, bson.M{"$inc": bson.M{"DownloadCount": 1}}) +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) { + var start, length int + if len(r) > 0 { + length = r[0] + if len(r) > 1 { + start = r[1] + } + } + q := d.books.Find(query).Sort("-_id") + num, err = q.Count() + if err != nil { + return + } + if start != 0 { + q = q.Skip(start) + } + if length != 0 { + q = q.Limit(length) + } + + err = q.All(&books) + for i, b := range books { + books[i].Id = bson.ObjectId(b.Id).Hex() + } + return +} + +/* Get the most visited books + */ +func (d *DB) GetVisitedBooks(num int) (books []Book, err error) { + var q *mgo.Query + q = d.books.Find(bson.M{"active": true}).Sort("-VisitsCount").Limit(num) + err = q.All(&books) + for i, b := range books { + books[i].Id = bson.ObjectId(b.Id).Hex() + } + return +} + +/* Get the most downloaded books + */ +func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) { + var q *mgo.Query + q = d.books.Find(bson.M{"active": true}).Sort("-DownloadCount").Limit(num) + err = q.All(&books) + for i, b := range books { + books[i].Id = bson.ObjectId(b.Id).Hex() + } + return +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetNewBooks(r ...int) (books []Book, num int, err error) { + return d.GetBooks(bson.M{"$nor": []bson.M{{"active": true}}}, r...) +} + +func (d *DB) BookActive(id bson.ObjectId) bool { + var book Book + err := d.books.Find(bson.M{"_id": id}).One(&book) + if err != nil { + return false + } + return book.Active +} + +func (d *DB) GetFS(prefix string) *mgo.GridFS { + return d.session.DB(DB_NAME).GridFS(prefix) +} + +func (d *DB) areTagsOutdated() bool { + var result struct { + Id bson.ObjectId `bson:"_id"` + } + err := d.meta.Find(bson.M{"type": META_TYPE_TAGS}).One(&result) + if err != nil { + return true + } + + lastUpdate := result.Id.Time() + return time.Since(lastUpdate).Minutes() > MINUTES_UPDATE_TAGS +} + +func (d *DB) updateTags() error { + _, err := d.meta.RemoveAll(bson.M{"type": META_TYPE_TAGS}) + if err != nil { + return err + } + + var mr mgo.MapReduce + mr.Map = "function() { " + + "if (this.active) { this.subject.forEach(function(s) { emit(s, 1); }); }" + + "}" + mr.Reduce = "function(tag, vals) { " + + "var count = 0;" + + "vals.forEach(function() { count += 1; });" + + "return count;" + + "}" + mr.Out = bson.M{"replace": TAGS_COLL} + _, err = d.books.Find(bson.M{"active": true}).MapReduce(&mr, nil) + if err != nil { + return err + } + + return d.meta.Insert(bson.M{"type": META_TYPE_TAGS}) +} + +func (d *DB) GetTags(numTags int) ([]string, error) { + if d.areTagsOutdated() { + err := d.updateTags() + if err != nil { + return nil, err + } + } + + var result []struct { + Tag string "_id" + } + err := d.tags.Find(nil).Sort("-value").Limit(numTags).All(&result) + if err != nil { + return nil, err + } + tags := make([]string, len(result)) + for i, r := range result { + tags[i] = r.Tag + } + return tags, nil +} diff --git a/tools/togridfs/togridfs.go b/tools/togridfs/togridfs.go new file mode 100644 index 0000000..9427f39 --- /dev/null +++ b/tools/togridfs/togridfs.go @@ -0,0 +1,61 @@ +package main + +import ( + "fmt" + "git.gitorious.org/go-pkg/epubgo.git" + "io" + "labix.org/v2/mgo/bson" + "os" +) + +func main() { + db = initDB() + defer db.Close() + books, _, _ := db.GetBooks(bson.M{}) + fs := db.GetFS(FS_BOOKS) + + for _, book := range books { + if book.Path == "" { + fmt.Println("don't needed -- ", book.Title) + continue + } + fmt.Println(book.Title) + + path := "books/" + book.Path + file, err := os.Open(path) + if err != nil { + fmt.Println("os.Open ================", err) + continue + } + defer file.Close() + + fw, err := fs.Create(book.Title + ".epub") + if err != nil { + fmt.Println("gridfs.Create ================", err) + continue + } + defer fw.Close() + + _, err = io.Copy(fw, file) + if err != nil { + fmt.Println("io.Copy ================", err) + continue + } + id, _ := fw.Id().(bson.ObjectId) + + e, err := epubgo.Open(path) + if err != nil { + fmt.Println("epubgo.Open ================", err) + continue + } + defer e.Close() + + cover, coverSmall := GetCover(e, book.Title) + if cover != "" { + db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"cover": cover, "coversmall": coverSmall, "file": id}, bson.M{"path": 1}) + } else { + fmt.Println("No cover ================", book.Title) + db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"file": id}, bson.M{"path": 1}) + } + } +}