Script to add the size of the file to the book metadata
This commit is contained in:
parent
43856dcd9b
commit
f5ae093e02
6 changed files with 595 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -7,5 +7,6 @@ tools/update/update
|
|||
tools/togridfs/togridfs
|
||||
tools/getISBNnDesc/getISBNnDesc
|
||||
tools/coverNew/coverNew
|
||||
tools/addsize/addsize
|
||||
tags
|
||||
.*.swp
|
||||
|
|
|
@ -11,3 +11,5 @@ Password:
|
|||
- getISBNnDesc (31/5/2013). Import the ISBN and the description with changes of lines to the database
|
||||
|
||||
- coverNew. Reload the cover from all the new books
|
||||
|
||||
- addsize. Add the size of the books to the book metadata
|
||||
|
|
38
tools/addsize/addsize.go
Normal file
38
tools/addsize/addsize.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"labix.org/v2/mgo/bson"
|
||||
)
|
||||
|
||||
func main() {
|
||||
db = initDB()
|
||||
defer db.Close()
|
||||
books, _, _ := db.GetBooks(bson.M{})
|
||||
|
||||
for _, book := range books {
|
||||
size, err := getSize(book.File)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
continue
|
||||
}
|
||||
err = db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"filesize": size})
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type file struct {
|
||||
Length int
|
||||
}
|
||||
|
||||
func getSize(id bson.ObjectId) (int, error) {
|
||||
fs := db.GetFS(FS_BOOKS)
|
||||
var f file
|
||||
err := fs.Find(bson.M{"_id": id}).One(&f)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return f.Length, nil
|
||||
}
|
45
tools/addsize/config.go
Normal file
45
tools/addsize/config.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
package main
|
||||
|
||||
const (
|
||||
PORT = "8080"
|
||||
|
||||
DB_IP = "127.0.0.1"
|
||||
DB_NAME = "trantor"
|
||||
META_COLL = "meta"
|
||||
BOOKS_COLL = "books"
|
||||
TAGS_COLL = "tags"
|
||||
VISITED_COLL = "visited"
|
||||
DOWNLOADED_COLL = "downloaded"
|
||||
HOURLY_VISITS_COLL = "visits.hourly"
|
||||
DAILY_VISITS_COLL = "visits.daily"
|
||||
MONTHLY_VISITS_COLL = "visits.monthly"
|
||||
USERS_COLL = "users"
|
||||
NEWS_COLL = "news"
|
||||
STATS_COLL = "statistics"
|
||||
FS_BOOKS = "fs_books"
|
||||
FS_IMGS = "fs_imgs"
|
||||
|
||||
PASS_SALT = "ImperialLibSalt"
|
||||
MINUTES_UPDATE_TAGS = 11
|
||||
MINUTES_UPDATE_VISITED = 41
|
||||
MINUTES_UPDATE_DOWNLOADED = 47
|
||||
MINUTES_UPDATE_HOURLY = 31
|
||||
MINUTES_UPDATE_DAILY = 60*12 + 7
|
||||
MINUTES_UPDATE_MONTHLY = 60*24 + 11
|
||||
TAGS_DISPLAY = 50
|
||||
SEARCH_ITEMS_PAGE = 20
|
||||
NEW_ITEMS_PAGE = 50
|
||||
NUM_NEWS = 10
|
||||
DAYS_NEWS_INDEXPAGE = 15
|
||||
|
||||
TEMPLATE_PATH = "templates/"
|
||||
CSS_PATH = "css/"
|
||||
JS_PATH = "js/"
|
||||
IMG_PATH = "img/"
|
||||
|
||||
IMG_WIDTH_BIG = 300
|
||||
IMG_WIDTH_SMALL = 60
|
||||
IMG_QUALITY = 80
|
||||
|
||||
CHAN_SIZE = 100
|
||||
)
|
243
tools/addsize/database.go
Normal file
243
tools/addsize/database.go
Normal file
|
@ -0,0 +1,243 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"time"
|
||||
)
|
||||
|
||||
var db *DB
|
||||
|
||||
type Book struct {
|
||||
Id string `bson:"_id"`
|
||||
Title string
|
||||
Author []string
|
||||
Contributor string
|
||||
Publisher string
|
||||
Description string
|
||||
Subject []string
|
||||
Date string
|
||||
Lang []string
|
||||
Isbn string
|
||||
Type string
|
||||
Format string
|
||||
Source string
|
||||
Relation string
|
||||
Coverage string
|
||||
Rights string
|
||||
Meta string
|
||||
File bson.ObjectId
|
||||
FileSize int
|
||||
Cover bson.ObjectId
|
||||
CoverSmall bson.ObjectId
|
||||
Active bool
|
||||
Keywords []string
|
||||
}
|
||||
|
||||
type News struct {
|
||||
Date time.Time
|
||||
Text string
|
||||
}
|
||||
|
||||
type DB struct {
|
||||
session *mgo.Session
|
||||
books *mgo.Collection
|
||||
user *mgo.Collection
|
||||
news *mgo.Collection
|
||||
stats *mgo.Collection
|
||||
mr *MR
|
||||
}
|
||||
|
||||
func initDB() *DB {
|
||||
var err error
|
||||
d := new(DB)
|
||||
d.session, err = mgo.Dial(DB_IP)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
database := d.session.DB(DB_NAME)
|
||||
d.books = database.C(BOOKS_COLL)
|
||||
d.user = database.C(USERS_COLL)
|
||||
d.news = database.C(NEWS_COLL)
|
||||
d.stats = database.C(STATS_COLL)
|
||||
d.mr = NewMR(database)
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *DB) Close() {
|
||||
d.session.Close()
|
||||
}
|
||||
|
||||
func md5Pass(pass string) []byte {
|
||||
h := md5.New()
|
||||
hash := h.Sum(([]byte)(PASS_SALT + pass))
|
||||
return hash
|
||||
}
|
||||
|
||||
func (d *DB) SetPassword(user string, pass string) error {
|
||||
hash := md5Pass(pass)
|
||||
return d.user.Update(bson.M{"user": user}, bson.M{"$set": bson.M{"pass": hash}})
|
||||
}
|
||||
|
||||
func (d *DB) UserValid(user string, pass string) bool {
|
||||
hash := md5Pass(pass)
|
||||
n, err := d.user.Find(bson.M{"user": user, "pass": hash}).Count()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return n != 0
|
||||
}
|
||||
|
||||
func (d *DB) UserRole(user string) string {
|
||||
type result struct {
|
||||
Role string
|
||||
}
|
||||
res := result{}
|
||||
err := d.user.Find(bson.M{"user": user}).One(&res)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return res.Role
|
||||
}
|
||||
|
||||
func (d *DB) AddNews(text string) error {
|
||||
var news News
|
||||
news.Text = text
|
||||
news.Date = time.Now()
|
||||
return d.news.Insert(news)
|
||||
}
|
||||
|
||||
func (d *DB) GetNews(num int, days int) (news []News, err error) {
|
||||
query := bson.M{}
|
||||
if days != 0 {
|
||||
duration := time.Duration(-24*days) * time.Hour
|
||||
date := time.Now().Add(duration)
|
||||
query = bson.M{"date": bson.M{"$gt": date}}
|
||||
}
|
||||
q := d.news.Find(query).Sort("-date").Limit(num)
|
||||
err = q.All(&news)
|
||||
return
|
||||
}
|
||||
|
||||
func (d *DB) InsertStats(stats interface{}) error {
|
||||
return d.stats.Insert(stats)
|
||||
}
|
||||
|
||||
func (d *DB) InsertBook(book interface{}) error {
|
||||
return d.books.Insert(book)
|
||||
}
|
||||
|
||||
func (d *DB) RemoveBook(id bson.ObjectId) error {
|
||||
return d.books.Remove(bson.M{"_id": id})
|
||||
}
|
||||
|
||||
func (d *DB) UpdateBook(id bson.ObjectId, data interface{}) error {
|
||||
return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data})
|
||||
}
|
||||
|
||||
/* optional parameters: length and start index
|
||||
*
|
||||
* Returns: list of books, number found and err
|
||||
*/
|
||||
func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) {
|
||||
var start, length int
|
||||
if len(r) > 0 {
|
||||
length = r[0]
|
||||
if len(r) > 1 {
|
||||
start = r[1]
|
||||
}
|
||||
}
|
||||
q := d.books.Find(query).Sort("-_id")
|
||||
num, err = q.Count()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if start != 0 {
|
||||
q = q.Skip(start)
|
||||
}
|
||||
if length != 0 {
|
||||
q = q.Limit(length)
|
||||
}
|
||||
|
||||
err = q.All(&books)
|
||||
for i, b := range books {
|
||||
books[i].Id = bson.ObjectId(b.Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* Get the most visited books
|
||||
*/
|
||||
func (d *DB) GetVisitedBooks(num int) (books []Book, err error) {
|
||||
bookId, err := d.mr.GetMostVisited(num, d.stats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
books = make([]Book, num)
|
||||
for i, id := range bookId {
|
||||
d.books.Find(bson.M{"_id": id}).One(&books[i])
|
||||
books[i].Id = bson.ObjectId(books[i].Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* Get the most downloaded books
|
||||
*/
|
||||
func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) {
|
||||
bookId, err := d.mr.GetMostDownloaded(num, d.stats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
books = make([]Book, num)
|
||||
for i, id := range bookId {
|
||||
d.books.Find(bson.M{"_id": id}).One(&books[i])
|
||||
books[i].Id = bson.ObjectId(books[i].Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* optional parameters: length and start index
|
||||
*
|
||||
* Returns: list of books, number found and err
|
||||
*/
|
||||
func (d *DB) GetNewBooks(r ...int) (books []Book, num int, err error) {
|
||||
return d.GetBooks(bson.M{"$nor": []bson.M{{"active": true}}}, r...)
|
||||
}
|
||||
|
||||
func (d *DB) BookActive(id bson.ObjectId) bool {
|
||||
var book Book
|
||||
err := d.books.Find(bson.M{"_id": id}).One(&book)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return book.Active
|
||||
}
|
||||
|
||||
func (d *DB) GetFS(prefix string) *mgo.GridFS {
|
||||
return d.session.DB(DB_NAME).GridFS(prefix)
|
||||
}
|
||||
|
||||
func (d *DB) GetTags(numTags int) ([]string, error) {
|
||||
return d.mr.GetTags(numTags, d.books)
|
||||
}
|
||||
|
||||
type Visits struct {
|
||||
Date int64 "_id"
|
||||
Count int "value"
|
||||
}
|
||||
|
||||
func (d *DB) GetHourVisits(start time.Time) ([]Visits, error) {
|
||||
return d.mr.GetHourVisits(start, d.stats)
|
||||
}
|
||||
|
||||
func (d *DB) GetDayVisits(start time.Time) ([]Visits, error) {
|
||||
return d.mr.GetDayVisits(start, d.stats)
|
||||
}
|
||||
|
||||
func (d *DB) GetMonthVisits(start time.Time) ([]Visits, error) {
|
||||
return d.mr.GetMonthVisits(start, d.stats)
|
||||
}
|
266
tools/addsize/mapreduce.go
Normal file
266
tools/addsize/mapreduce.go
Normal file
|
@ -0,0 +1,266 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"time"
|
||||
)
|
||||
|
||||
type MR struct {
|
||||
meta *mgo.Collection
|
||||
tags *mgo.Collection
|
||||
visited *mgo.Collection
|
||||
downloaded *mgo.Collection
|
||||
hourly_raw *mgo.Collection
|
||||
daily_raw *mgo.Collection
|
||||
monthly_raw *mgo.Collection
|
||||
hourly *mgo.Collection
|
||||
daily *mgo.Collection
|
||||
monthly *mgo.Collection
|
||||
}
|
||||
|
||||
func NewMR(database *mgo.Database) *MR {
|
||||
m := new(MR)
|
||||
m.meta = database.C(META_COLL)
|
||||
m.tags = database.C(TAGS_COLL)
|
||||
m.visited = database.C(VISITED_COLL)
|
||||
m.downloaded = database.C(DOWNLOADED_COLL)
|
||||
m.hourly_raw = database.C(HOURLY_VISITS_COLL + "_raw")
|
||||
m.daily_raw = database.C(DAILY_VISITS_COLL + "_raw")
|
||||
m.monthly_raw = database.C(MONTHLY_VISITS_COLL + "_raw")
|
||||
m.hourly = database.C(HOURLY_VISITS_COLL)
|
||||
m.daily = database.C(DAILY_VISITS_COLL)
|
||||
m.monthly = database.C(MONTHLY_VISITS_COLL)
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *MR) GetTags(numTags int, booksColl *mgo.Collection) ([]string, error) {
|
||||
if m.isOutdated(TAGS_COLL, MINUTES_UPDATE_TAGS) {
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
if (this.subject) {
|
||||
this.subject.forEach(function(s) { emit(s, 1); });
|
||||
}
|
||||
}`
|
||||
mr.Reduce = `function(tag, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function() { count += 1; });
|
||||
return count;
|
||||
}`
|
||||
err := m.update(&mr, bson.M{"active": true}, booksColl, TAGS_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []struct {
|
||||
Tag string "_id"
|
||||
}
|
||||
err := m.tags.Find(nil).Sort("-value").Limit(numTags).All(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := make([]string, len(result))
|
||||
for i, r := range result {
|
||||
tags[i] = r.Tag
|
||||
}
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (m *MR) GetMostVisited(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) {
|
||||
if m.isOutdated(VISITED_COLL, MINUTES_UPDATE_VISITED) {
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
emit(this.id, 1);
|
||||
}`
|
||||
mr.Reduce = `function(tag, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function() { count += 1; });
|
||||
return count;
|
||||
}`
|
||||
err := m.update(&mr, bson.M{"section": "book"}, statsColl, VISITED_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []struct {
|
||||
Book bson.ObjectId "_id"
|
||||
}
|
||||
err := m.visited.Find(nil).Sort("-value").Limit(num).All(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
books := make([]bson.ObjectId, len(result))
|
||||
for i, r := range result {
|
||||
books[i] = r.Book
|
||||
}
|
||||
return books, nil
|
||||
}
|
||||
|
||||
func (m *MR) GetMostDownloaded(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) {
|
||||
if m.isOutdated(DOWNLOADED_COLL, MINUTES_UPDATE_DOWNLOADED) {
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
emit(this.id, 1);
|
||||
}`
|
||||
mr.Reduce = `function(tag, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function() { count += 1; });
|
||||
return count;
|
||||
}`
|
||||
err := m.update(&mr, bson.M{"section": "download"}, statsColl, DOWNLOADED_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []struct {
|
||||
Book bson.ObjectId "_id"
|
||||
}
|
||||
err := m.downloaded.Find(nil).Sort("-value").Limit(num).All(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
books := make([]bson.ObjectId, len(result))
|
||||
for i, r := range result {
|
||||
books[i] = r.Book
|
||||
}
|
||||
return books, nil
|
||||
}
|
||||
|
||||
func (m *MR) GetHourVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) {
|
||||
if m.isOutdated(HOURLY_VISITS_COLL, MINUTES_UPDATE_HOURLY) {
|
||||
const reduce = `function(date, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function(v) { count += v; });
|
||||
return count;
|
||||
}`
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
var date = Date.UTC(this.date.getUTCFullYear(),
|
||||
this.date.getUTCMonth(),
|
||||
this.date.getUTCDate(),
|
||||
this.date.getUTCHours());
|
||||
emit({date: date, session: this.session}, 1);
|
||||
}`
|
||||
mr.Reduce = reduce
|
||||
err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_VISITS_COLL+"_raw")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var mr2 mgo.MapReduce
|
||||
mr2.Map = `function() {
|
||||
emit(this['_id']['date'], 1);
|
||||
}`
|
||||
mr2.Reduce = reduce
|
||||
err = m.update(&mr2, bson.M{}, m.hourly_raw, HOURLY_VISITS_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []Visits
|
||||
err := m.hourly.Find(nil).All(&result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (m *MR) GetDayVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) {
|
||||
if m.isOutdated(DAILY_VISITS_COLL, MINUTES_UPDATE_DAILY) {
|
||||
const reduce = `function(date, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function(v) { count += v; });
|
||||
return count;
|
||||
}`
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
var date = Date.UTC(this.date.getUTCFullYear(),
|
||||
this.date.getUTCMonth(),
|
||||
this.date.getUTCDate());
|
||||
emit({date: date, session: this.session}, 1);
|
||||
}`
|
||||
mr.Reduce = reduce
|
||||
err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_VISITS_COLL+"_raw")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var mr2 mgo.MapReduce
|
||||
mr2.Map = `function() {
|
||||
emit(this['_id']['date'], 1);
|
||||
}`
|
||||
mr2.Reduce = reduce
|
||||
err = m.update(&mr2, bson.M{}, m.daily_raw, DAILY_VISITS_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []Visits
|
||||
err := m.daily.Find(nil).All(&result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (m *MR) GetMonthVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) {
|
||||
if m.isOutdated(MONTHLY_VISITS_COLL, MINUTES_UPDATE_MONTHLY) {
|
||||
const reduce = `function(date, vals) {
|
||||
var count = 0;
|
||||
vals.forEach(function(v) { count += v; });
|
||||
return count;
|
||||
}`
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = `function() {
|
||||
var date = Date.UTC(this.date.getUTCFullYear(),
|
||||
this.date.getUTCMonth());
|
||||
emit({date: date, session: this.session}, 1);
|
||||
}`
|
||||
mr.Reduce = reduce
|
||||
err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_VISITS_COLL+"_raw")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var mr2 mgo.MapReduce
|
||||
mr2.Map = `function() {
|
||||
emit(this['_id']['date'], 1);
|
||||
}`
|
||||
mr2.Reduce = reduce
|
||||
err = m.update(&mr2, bson.M{}, m.monthly_raw, MONTHLY_VISITS_COLL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []Visits
|
||||
err := m.monthly.Find(nil).All(&result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (m *MR) update(mr *mgo.MapReduce, query bson.M, queryColl *mgo.Collection, storeColl string) error {
|
||||
_, err := m.meta.RemoveAll(bson.M{"type": storeColl})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mr.Out = bson.M{"replace": storeColl}
|
||||
_, err = queryColl.Find(query).MapReduce(mr, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return m.meta.Insert(bson.M{"type": storeColl})
|
||||
}
|
||||
|
||||
func (m *MR) isOutdated(coll string, minutes float64) bool {
|
||||
var result struct {
|
||||
Id bson.ObjectId `bson:"_id"`
|
||||
}
|
||||
err := m.meta.Find(bson.M{"type": coll}).One(&result)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
lastUpdate := result.Id.Time()
|
||||
return time.Since(lastUpdate).Minutes() > minutes
|
||||
}
|
Reference in a new issue