From f5ae093e02af594f4678527bdfad31928016507f Mon Sep 17 00:00:00 2001 From: Las Zenow Date: Thu, 19 Sep 2013 00:49:48 +0200 Subject: [PATCH] Script to add the size of the file to the book metadata --- .gitignore | 1 + tools/README | 2 + tools/addsize/addsize.go | 38 ++++++ tools/addsize/config.go | 45 +++++++ tools/addsize/database.go | 243 +++++++++++++++++++++++++++++++++ tools/addsize/mapreduce.go | 266 +++++++++++++++++++++++++++++++++++++ 6 files changed, 595 insertions(+) create mode 100644 tools/addsize/addsize.go create mode 100644 tools/addsize/config.go create mode 100644 tools/addsize/database.go create mode 100644 tools/addsize/mapreduce.go diff --git a/.gitignore b/.gitignore index 2392860..9510703 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ tools/update/update tools/togridfs/togridfs tools/getISBNnDesc/getISBNnDesc tools/coverNew/coverNew +tools/addsize/addsize tags .*.swp diff --git a/tools/README b/tools/README index ece1903..d0b3b4b 100644 --- a/tools/README +++ b/tools/README @@ -11,3 +11,5 @@ Password: - getISBNnDesc (31/5/2013). Import the ISBN and the description with changes of lines to the database - coverNew. Reload the cover from all the new books + +- addsize. Add the size of the books to the book metadata diff --git a/tools/addsize/addsize.go b/tools/addsize/addsize.go new file mode 100644 index 0000000..0c8d6a9 --- /dev/null +++ b/tools/addsize/addsize.go @@ -0,0 +1,38 @@ +package main + +import ( + "fmt" + "labix.org/v2/mgo/bson" +) + +func main() { + db = initDB() + defer db.Close() + books, _, _ := db.GetBooks(bson.M{}) + + for _, book := range books { + size, err := getSize(book.File) + if err != nil { + fmt.Println(err) + continue + } + err = db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"filesize": size}) + if err != nil { + fmt.Println(err) + } + } +} + +type file struct { + Length int +} + +func getSize(id bson.ObjectId) (int, error) { + fs := db.GetFS(FS_BOOKS) + var f file + err := fs.Find(bson.M{"_id": id}).One(&f) + if err != nil { + return 0, err + } + return f.Length, nil +} diff --git a/tools/addsize/config.go b/tools/addsize/config.go new file mode 100644 index 0000000..f92f3a3 --- /dev/null +++ b/tools/addsize/config.go @@ -0,0 +1,45 @@ +package main + +const ( + PORT = "8080" + + DB_IP = "127.0.0.1" + DB_NAME = "trantor" + META_COLL = "meta" + BOOKS_COLL = "books" + TAGS_COLL = "tags" + VISITED_COLL = "visited" + DOWNLOADED_COLL = "downloaded" + HOURLY_VISITS_COLL = "visits.hourly" + DAILY_VISITS_COLL = "visits.daily" + MONTHLY_VISITS_COLL = "visits.monthly" + USERS_COLL = "users" + NEWS_COLL = "news" + STATS_COLL = "statistics" + FS_BOOKS = "fs_books" + FS_IMGS = "fs_imgs" + + PASS_SALT = "ImperialLibSalt" + MINUTES_UPDATE_TAGS = 11 + MINUTES_UPDATE_VISITED = 41 + MINUTES_UPDATE_DOWNLOADED = 47 + MINUTES_UPDATE_HOURLY = 31 + MINUTES_UPDATE_DAILY = 60*12 + 7 + MINUTES_UPDATE_MONTHLY = 60*24 + 11 + TAGS_DISPLAY = 50 + SEARCH_ITEMS_PAGE = 20 + NEW_ITEMS_PAGE = 50 + NUM_NEWS = 10 + DAYS_NEWS_INDEXPAGE = 15 + + TEMPLATE_PATH = "templates/" + CSS_PATH = "css/" + JS_PATH = "js/" + IMG_PATH = "img/" + + IMG_WIDTH_BIG = 300 + IMG_WIDTH_SMALL = 60 + IMG_QUALITY = 80 + + CHAN_SIZE = 100 +) diff --git a/tools/addsize/database.go b/tools/addsize/database.go new file mode 100644 index 0000000..e22693f --- /dev/null +++ b/tools/addsize/database.go @@ -0,0 +1,243 @@ +package main + +import ( + "crypto/md5" + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "time" +) + +var db *DB + +type Book struct { + Id string `bson:"_id"` + Title string + Author []string + Contributor string + Publisher string + Description string + Subject []string + Date string + Lang []string + Isbn string + Type string + Format string + Source string + Relation string + Coverage string + Rights string + Meta string + File bson.ObjectId + FileSize int + Cover bson.ObjectId + CoverSmall bson.ObjectId + Active bool + Keywords []string +} + +type News struct { + Date time.Time + Text string +} + +type DB struct { + session *mgo.Session + books *mgo.Collection + user *mgo.Collection + news *mgo.Collection + stats *mgo.Collection + mr *MR +} + +func initDB() *DB { + var err error + d := new(DB) + d.session, err = mgo.Dial(DB_IP) + if err != nil { + panic(err) + } + + database := d.session.DB(DB_NAME) + d.books = database.C(BOOKS_COLL) + d.user = database.C(USERS_COLL) + d.news = database.C(NEWS_COLL) + d.stats = database.C(STATS_COLL) + d.mr = NewMR(database) + return d +} + +func (d *DB) Close() { + d.session.Close() +} + +func md5Pass(pass string) []byte { + h := md5.New() + hash := h.Sum(([]byte)(PASS_SALT + pass)) + return hash +} + +func (d *DB) SetPassword(user string, pass string) error { + hash := md5Pass(pass) + return d.user.Update(bson.M{"user": user}, bson.M{"$set": bson.M{"pass": hash}}) +} + +func (d *DB) UserValid(user string, pass string) bool { + hash := md5Pass(pass) + n, err := d.user.Find(bson.M{"user": user, "pass": hash}).Count() + if err != nil { + return false + } + return n != 0 +} + +func (d *DB) UserRole(user string) string { + type result struct { + Role string + } + res := result{} + err := d.user.Find(bson.M{"user": user}).One(&res) + if err != nil { + return "" + } + return res.Role +} + +func (d *DB) AddNews(text string) error { + var news News + news.Text = text + news.Date = time.Now() + return d.news.Insert(news) +} + +func (d *DB) GetNews(num int, days int) (news []News, err error) { + query := bson.M{} + if days != 0 { + duration := time.Duration(-24*days) * time.Hour + date := time.Now().Add(duration) + query = bson.M{"date": bson.M{"$gt": date}} + } + q := d.news.Find(query).Sort("-date").Limit(num) + err = q.All(&news) + return +} + +func (d *DB) InsertStats(stats interface{}) error { + return d.stats.Insert(stats) +} + +func (d *DB) InsertBook(book interface{}) error { + return d.books.Insert(book) +} + +func (d *DB) RemoveBook(id bson.ObjectId) error { + return d.books.Remove(bson.M{"_id": id}) +} + +func (d *DB) UpdateBook(id bson.ObjectId, data interface{}) error { + return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data}) +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) { + var start, length int + if len(r) > 0 { + length = r[0] + if len(r) > 1 { + start = r[1] + } + } + q := d.books.Find(query).Sort("-_id") + num, err = q.Count() + if err != nil { + return + } + if start != 0 { + q = q.Skip(start) + } + if length != 0 { + q = q.Limit(length) + } + + err = q.All(&books) + for i, b := range books { + books[i].Id = bson.ObjectId(b.Id).Hex() + } + return +} + +/* Get the most visited books + */ +func (d *DB) GetVisitedBooks(num int) (books []Book, err error) { + bookId, err := d.mr.GetMostVisited(num, d.stats) + if err != nil { + return nil, err + } + + books = make([]Book, num) + for i, id := range bookId { + d.books.Find(bson.M{"_id": id}).One(&books[i]) + books[i].Id = bson.ObjectId(books[i].Id).Hex() + } + return +} + +/* Get the most downloaded books + */ +func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) { + bookId, err := d.mr.GetMostDownloaded(num, d.stats) + if err != nil { + return nil, err + } + + books = make([]Book, num) + for i, id := range bookId { + d.books.Find(bson.M{"_id": id}).One(&books[i]) + books[i].Id = bson.ObjectId(books[i].Id).Hex() + } + return +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetNewBooks(r ...int) (books []Book, num int, err error) { + return d.GetBooks(bson.M{"$nor": []bson.M{{"active": true}}}, r...) +} + +func (d *DB) BookActive(id bson.ObjectId) bool { + var book Book + err := d.books.Find(bson.M{"_id": id}).One(&book) + if err != nil { + return false + } + return book.Active +} + +func (d *DB) GetFS(prefix string) *mgo.GridFS { + return d.session.DB(DB_NAME).GridFS(prefix) +} + +func (d *DB) GetTags(numTags int) ([]string, error) { + return d.mr.GetTags(numTags, d.books) +} + +type Visits struct { + Date int64 "_id" + Count int "value" +} + +func (d *DB) GetHourVisits(start time.Time) ([]Visits, error) { + return d.mr.GetHourVisits(start, d.stats) +} + +func (d *DB) GetDayVisits(start time.Time) ([]Visits, error) { + return d.mr.GetDayVisits(start, d.stats) +} + +func (d *DB) GetMonthVisits(start time.Time) ([]Visits, error) { + return d.mr.GetMonthVisits(start, d.stats) +} diff --git a/tools/addsize/mapreduce.go b/tools/addsize/mapreduce.go new file mode 100644 index 0000000..dbadd19 --- /dev/null +++ b/tools/addsize/mapreduce.go @@ -0,0 +1,266 @@ +package main + +import ( + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "time" +) + +type MR struct { + meta *mgo.Collection + tags *mgo.Collection + visited *mgo.Collection + downloaded *mgo.Collection + hourly_raw *mgo.Collection + daily_raw *mgo.Collection + monthly_raw *mgo.Collection + hourly *mgo.Collection + daily *mgo.Collection + monthly *mgo.Collection +} + +func NewMR(database *mgo.Database) *MR { + m := new(MR) + m.meta = database.C(META_COLL) + m.tags = database.C(TAGS_COLL) + m.visited = database.C(VISITED_COLL) + m.downloaded = database.C(DOWNLOADED_COLL) + m.hourly_raw = database.C(HOURLY_VISITS_COLL + "_raw") + m.daily_raw = database.C(DAILY_VISITS_COLL + "_raw") + m.monthly_raw = database.C(MONTHLY_VISITS_COLL + "_raw") + m.hourly = database.C(HOURLY_VISITS_COLL) + m.daily = database.C(DAILY_VISITS_COLL) + m.monthly = database.C(MONTHLY_VISITS_COLL) + return m +} + +func (m *MR) GetTags(numTags int, booksColl *mgo.Collection) ([]string, error) { + if m.isOutdated(TAGS_COLL, MINUTES_UPDATE_TAGS) { + var mr mgo.MapReduce + mr.Map = `function() { + if (this.subject) { + this.subject.forEach(function(s) { emit(s, 1); }); + } + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + err := m.update(&mr, bson.M{"active": true}, booksColl, TAGS_COLL) + if err != nil { + return nil, err + } + } + + var result []struct { + Tag string "_id" + } + err := m.tags.Find(nil).Sort("-value").Limit(numTags).All(&result) + if err != nil { + return nil, err + } + + tags := make([]string, len(result)) + for i, r := range result { + tags[i] = r.Tag + } + return tags, nil +} + +func (m *MR) GetMostVisited(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) { + if m.isOutdated(VISITED_COLL, MINUTES_UPDATE_VISITED) { + var mr mgo.MapReduce + mr.Map = `function() { + emit(this.id, 1); + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + err := m.update(&mr, bson.M{"section": "book"}, statsColl, VISITED_COLL) + if err != nil { + return nil, err + } + } + + var result []struct { + Book bson.ObjectId "_id" + } + err := m.visited.Find(nil).Sort("-value").Limit(num).All(&result) + if err != nil { + return nil, err + } + + books := make([]bson.ObjectId, len(result)) + for i, r := range result { + books[i] = r.Book + } + return books, nil +} + +func (m *MR) GetMostDownloaded(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) { + if m.isOutdated(DOWNLOADED_COLL, MINUTES_UPDATE_DOWNLOADED) { + var mr mgo.MapReduce + mr.Map = `function() { + emit(this.id, 1); + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + err := m.update(&mr, bson.M{"section": "download"}, statsColl, DOWNLOADED_COLL) + if err != nil { + return nil, err + } + } + + var result []struct { + Book bson.ObjectId "_id" + } + err := m.downloaded.Find(nil).Sort("-value").Limit(num).All(&result) + if err != nil { + return nil, err + } + + books := make([]bson.ObjectId, len(result)) + for i, r := range result { + books[i] = r.Book + } + return books, nil +} + +func (m *MR) GetHourVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { + if m.isOutdated(HOURLY_VISITS_COLL, MINUTES_UPDATE_HOURLY) { + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate(), + this.date.getUTCHours()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_VISITS_COLL+"_raw") + if err != nil { + return nil, err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + err = m.update(&mr2, bson.M{}, m.hourly_raw, HOURLY_VISITS_COLL) + if err != nil { + return nil, err + } + } + + var result []Visits + err := m.hourly.Find(nil).All(&result) + return result, err +} + +func (m *MR) GetDayVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { + if m.isOutdated(DAILY_VISITS_COLL, MINUTES_UPDATE_DAILY) { + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_VISITS_COLL+"_raw") + if err != nil { + return nil, err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + err = m.update(&mr2, bson.M{}, m.daily_raw, DAILY_VISITS_COLL) + if err != nil { + return nil, err + } + } + + var result []Visits + err := m.daily.Find(nil).All(&result) + return result, err +} + +func (m *MR) GetMonthVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { + if m.isOutdated(MONTHLY_VISITS_COLL, MINUTES_UPDATE_MONTHLY) { + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_VISITS_COLL+"_raw") + if err != nil { + return nil, err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + err = m.update(&mr2, bson.M{}, m.monthly_raw, MONTHLY_VISITS_COLL) + if err != nil { + return nil, err + } + } + + var result []Visits + err := m.monthly.Find(nil).All(&result) + return result, err +} + +func (m *MR) update(mr *mgo.MapReduce, query bson.M, queryColl *mgo.Collection, storeColl string) error { + _, err := m.meta.RemoveAll(bson.M{"type": storeColl}) + if err != nil { + return err + } + + mr.Out = bson.M{"replace": storeColl} + _, err = queryColl.Find(query).MapReduce(mr, nil) + if err != nil { + return err + } + + return m.meta.Insert(bson.M{"type": storeColl}) +} + +func (m *MR) isOutdated(coll string, minutes float64) bool { + var result struct { + Id bson.ObjectId `bson:"_id"` + } + err := m.meta.Find(bson.M{"type": coll}).One(&result) + if err != nil { + return true + } + + lastUpdate := result.Id.Time() + return time.Since(lastUpdate).Minutes() > minutes +}