diff --git a/.gitignore b/.gitignore index 9510703..d1005e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -books/ -new/ -cover/ trantor tools/adduser/adduser tools/update/update @@ -8,5 +5,6 @@ tools/togridfs/togridfs tools/getISBNnDesc/getISBNnDesc tools/coverNew/coverNew tools/addsize/addsize +tools/importer/importer tags .*.swp diff --git a/README b/README index 1f5caac..ea5dbae 100644 --- a/README +++ b/README @@ -25,7 +25,9 @@ Under Debian Wheezy you can simply run: Yo also need to install go dependences: -# go get labix.org/v2/mgo/bson labix.org/v2/mgo/ github.com/gorilla/sessions github.com/gorilla/securecookie github.com/gorilla/mux github.com/nfnt/resize +# go get labix.org/v2/mgo/bson labix.org/v2/mgo/ github.com/gorilla/sessions \ + github.com/gorilla/securecookie github.com/gorilla/mux \ + github.com/nfnt/resize github.com/cihub/seelog == Installation == === For admins ("for developers" below) === diff --git a/admin.go b/admin.go index b2344d6..a7b4b90 100644 --- a/admin.go +++ b/admin.go @@ -1,9 +1,10 @@ package main +import log "github.com/cihub/seelog" + import ( "github.com/gorilla/mux" "labix.org/v2/mgo/bson" - "log" "net/http" "strconv" "strings" @@ -195,7 +196,7 @@ func storeHandler(h handler) { book := books[0] if err != nil { h.sess.Notify("An error ocurred!", err.Error(), "error") - log.Println("Error storing book '", book.Title, "': ", err.Error()) + log.Error("Error storing book '", book.Title, "': ", err.Error()) continue } h.db.UpdateBook(id, bson.M{"active": true}) diff --git a/config.go b/config.go index 67fe58c..6099708 100644 --- a/config.go +++ b/config.go @@ -26,9 +26,13 @@ const ( MINUTES_UPDATE_TAGS = 11 MINUTES_UPDATE_VISITED = 41 MINUTES_UPDATE_DOWNLOADED = 47 - MINUTES_UPDATE_HOURLY = 31 - MINUTES_UPDATE_DAILY = 60*12 + 7 - MINUTES_UPDATE_MONTHLY = 60*24 + 11 + MINUTES_UPDATE_HOURLY_V = 31 + MINUTES_UPDATE_DAILY_V = 60*12 + 7 + MINUTES_UPDATE_MONTHLY_V = 60*24 + 11 + MINUTES_UPDATE_HOURLY_D = 29 + MINUTES_UPDATE_DAILY_D = 60*12 + 13 + MINUTES_UPDATE_MONTHLY_D = 60*24 + 17 + MINUTES_UPDATE_LOGGER = 5 TAGS_DISPLAY = 50 SEARCH_ITEMS_PAGE = 20 NEW_ITEMS_PAGE = 50 @@ -39,6 +43,7 @@ const ( CSS_PATH = "css/" JS_PATH = "js/" IMG_PATH = "img/" + LOGGER_CONFIG = "logger.xml" IMG_WIDTH_BIG = 300 IMG_WIDTH_SMALL = 60 diff --git a/cover.go b/cover.go index a2ff26d..c6e3bae 100644 --- a/cover.go +++ b/cover.go @@ -1,5 +1,6 @@ package main +import log "github.com/cihub/seelog" import _ "image/png" import _ "image/jpeg" import _ "image/gif" @@ -15,7 +16,6 @@ import ( "io/ioutil" "labix.org/v2/mgo" "labix.org/v2/mgo/bson" - "log" "regexp" "strings" ) @@ -49,7 +49,7 @@ func coverHandler(h handler) { f, err = fs.OpenId(book.Cover) } if err != nil { - log.Println("Error while opening image:", err) + log.Error("Error while opening image: ", err) notFound(h) return } @@ -145,14 +145,14 @@ func storeImg(img io.Reader, title string, db *DB) (bson.ObjectId, bson.ObjectId /* open the files */ fBig, err := createCoverFile(title, db) if err != nil { - log.Println("Error creating", title, ":", err.Error()) + log.Error("Error creating ", title, ": ", err.Error()) return "", "" } defer fBig.Close() fSmall, err := createCoverFile(title+"_small", db) if err != nil { - log.Println("Error creating", title+"_small", ":", err.Error()) + log.Error("Error creating ", title+"_small", ": ", err.Error()) return "", "" } defer fSmall.Close() @@ -163,22 +163,22 @@ func storeImg(img io.Reader, title string, db *DB) (bson.ObjectId, bson.ObjectId jpgOptions := jpeg.Options{IMG_QUALITY} imgResized, err := resizeImg(img1, IMG_WIDTH_BIG) if err != nil { - log.Println("Error resizing big image:", err.Error()) + log.Error("Error resizing big image: ", err.Error()) return "", "" } err = jpeg.Encode(fBig, imgResized, &jpgOptions) if err != nil { - log.Println("Error encoding big image:", err.Error()) + log.Error("Error encoding big image: ", err.Error()) return "", "" } imgSmallResized, err := resizeImg(&img2, IMG_WIDTH_SMALL) if err != nil { - log.Println("Error resizing small image:", err.Error()) + log.Error("Error resizing small image: ", err.Error()) return "", "" } err = jpeg.Encode(fSmall, imgSmallResized, &jpgOptions) if err != nil { - log.Println("Error encoding small image:", err.Error()) + log.Error("Error encoding small image: ", err.Error()) return "", "" } diff --git a/database.go b/database.go index 92a68a1..65ed984 100644 --- a/database.go +++ b/database.go @@ -1,9 +1,12 @@ package main +import log "github.com/cihub/seelog" + import ( "crypto/md5" "labix.org/v2/mgo" "labix.org/v2/mgo/bson" + "os" "time" ) @@ -47,7 +50,8 @@ func initDB() *DB { d := new(DB) d.session, err = mgo.Dial(DB_IP) if err != nil { - panic(err) + log.Critical(err) + os.Exit(1) } return d } @@ -179,9 +183,8 @@ func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) /* Get the most visited books */ func (d *DB) GetVisitedBooks(num int) (books []Book, err error) { - statsColl := d.session.DB(DB_NAME).C(STATS_COLL) - mr := NewMR(d.session.DB(DB_NAME)) - bookId, err := mr.GetMostVisited(num, statsColl) + visitedColl := d.session.DB(DB_NAME).C(VISITED_COLL) + bookId, err := GetBooksVisited(num, visitedColl) if err != nil { return nil, err } @@ -195,12 +198,17 @@ func (d *DB) GetVisitedBooks(num int) (books []Book, err error) { return } +func (d *DB) UpdateMostVisited() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMostVisited(statsColl) +} + /* Get the most downloaded books */ func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) { - statsColl := d.session.DB(DB_NAME).C(STATS_COLL) - mr := NewMR(d.session.DB(DB_NAME)) - bookId, err := mr.GetMostDownloaded(num, statsColl) + downloadedColl := d.session.DB(DB_NAME).C(DOWNLOADED_COLL) + bookId, err := GetBooksVisited(num, downloadedColl) if err != nil { return nil, err } @@ -214,6 +222,12 @@ func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) { return } +func (d *DB) UpdateDownloadedBooks() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMostDownloaded(statsColl) +} + /* optional parameters: length and start index * * Returns: list of books, number found and err @@ -237,9 +251,14 @@ func (d *DB) GetFS(prefix string) *mgo.GridFS { } func (d *DB) GetTags(numTags int) ([]string, error) { + tagsColl := d.session.DB(DB_NAME).C(TAGS_COLL) + return GetTags(numTags, tagsColl) +} + +func (d *DB) UpdateTags() error { booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetTags(numTags, booksColl) + return mr.UpdateTags(booksColl) } type Visits struct { @@ -247,38 +266,68 @@ type Visits struct { Count int "value" } -func (d *DB) GetHourVisits(start time.Time) ([]Visits, error) { - statsColl := d.session.DB(DB_NAME).C(STATS_COLL) - mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetHourVisits(start, statsColl) +func (d *DB) GetHourVisits() ([]Visits, error) { + hourlyColl := d.session.DB(DB_NAME).C(HOURLY_VISITS_COLL) + return GetVisits(hourlyColl) } -func (d *DB) GetDayVisits(start time.Time) ([]Visits, error) { +func (d *DB) UpdateHourVisits() error { statsColl := d.session.DB(DB_NAME).C(STATS_COLL) mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetDayVisits(start, statsColl) + return mr.UpdateHourVisits(statsColl) } -func (d *DB) GetMonthVisits(start time.Time) ([]Visits, error) { - statsColl := d.session.DB(DB_NAME).C(STATS_COLL) - mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetMonthVisits(start, statsColl) +func (d *DB) GetDayVisits() ([]Visits, error) { + dailyColl := d.session.DB(DB_NAME).C(DAILY_VISITS_COLL) + return GetVisits(dailyColl) } -func (d *DB) GetHourDownloads(start time.Time) ([]Visits, error) { +func (d *DB) UpdateDayVisits() error { statsColl := d.session.DB(DB_NAME).C(STATS_COLL) mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetHourDownloads(start, statsColl) + return mr.UpdateDayVisits(statsColl) } -func (d *DB) GetDayDownloads(start time.Time) ([]Visits, error) { - statsColl := d.session.DB(DB_NAME).C(STATS_COLL) - mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetDayDowloads(start, statsColl) +func (d *DB) GetMonthVisits() ([]Visits, error) { + monthlyColl := d.session.DB(DB_NAME).C(MONTHLY_VISITS_COLL) + return GetVisits(monthlyColl) } -func (d *DB) GetMonthDownloads(start time.Time) ([]Visits, error) { +func (d *DB) UpdateMonthVisits() error { statsColl := d.session.DB(DB_NAME).C(STATS_COLL) mr := NewMR(d.session.DB(DB_NAME)) - return mr.GetMonthDowloads(start, statsColl) + return mr.UpdateMonthVisits(statsColl) +} + +func (d *DB) GetHourDownloads() ([]Visits, error) { + hourlyColl := d.session.DB(DB_NAME).C(HOURLY_DOWNLOADS_COLL) + return GetVisits(hourlyColl) +} + +func (d *DB) UpdateHourDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateHourDownloads(statsColl) +} + +func (d *DB) GetDayDownloads() ([]Visits, error) { + dailyColl := d.session.DB(DB_NAME).C(DAILY_DOWNLOADS_COLL) + return GetVisits(dailyColl) +} + +func (d *DB) UpdateDayDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateDayDownloads(statsColl) +} + +func (d *DB) GetMonthDownloads() ([]Visits, error) { + monthlyColl := d.session.DB(DB_NAME).C(MONTHLY_DOWNLOADS_COLL) + return GetVisits(monthlyColl) +} + +func (d *DB) UpdateMonthDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMonthDownloads(statsColl) } diff --git a/logger.xml b/logger.xml new file mode 100644 index 0000000..7b8ac3a --- /dev/null +++ b/logger.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/mapreduce.go b/mapreduce.go index 7603fb9..c24deec 100644 --- a/mapreduce.go +++ b/mapreduce.go @@ -6,39 +6,10 @@ import ( "time" ) -type MR struct { - database *mgo.Database -} - -func NewMR(database *mgo.Database) *MR { - m := new(MR) - m.database = database - return m -} - -func (m *MR) GetTags(numTags int, booksColl *mgo.Collection) ([]string, error) { - if m.isOutdated(TAGS_COLL, MINUTES_UPDATE_TAGS) { - var mr mgo.MapReduce - mr.Map = `function() { - if (this.subject) { - this.subject.forEach(function(s) { emit(s, 1); }); - } - }` - mr.Reduce = `function(tag, vals) { - var count = 0; - vals.forEach(function() { count += 1; }); - return count; - }` - err := m.update(&mr, bson.M{"active": true}, booksColl, TAGS_COLL) - if err != nil { - return nil, err - } - } - +func GetTags(numTags int, tagsColl *mgo.Collection) ([]string, error) { var result []struct { Tag string "_id" } - tagsColl := m.database.C(TAGS_COLL) err := tagsColl.Find(nil).Sort("-value").Limit(numTags).All(&result) if err != nil { return nil, err @@ -51,27 +22,10 @@ func (m *MR) GetTags(numTags int, booksColl *mgo.Collection) ([]string, error) { return tags, nil } -func (m *MR) GetMostVisited(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) { - if m.isOutdated(VISITED_COLL, MINUTES_UPDATE_VISITED) { - var mr mgo.MapReduce - mr.Map = `function() { - emit(this.id, 1); - }` - mr.Reduce = `function(tag, vals) { - var count = 0; - vals.forEach(function() { count += 1; }); - return count; - }` - err := m.update(&mr, bson.M{"section": "book"}, statsColl, VISITED_COLL) - if err != nil { - return nil, err - } - } - +func GetBooksVisited(num int, visitedColl *mgo.Collection) ([]bson.ObjectId, error) { var result []struct { Book bson.ObjectId "_id" } - visitedColl := m.database.C(VISITED_COLL) err := visitedColl.Find(nil).Sort("-value").Limit(num).All(&result) if err != nil { return nil, err @@ -84,232 +38,218 @@ func (m *MR) GetMostVisited(num int, statsColl *mgo.Collection) ([]bson.ObjectId return books, nil } -func (m *MR) GetMostDownloaded(num int, statsColl *mgo.Collection) ([]bson.ObjectId, error) { - if m.isOutdated(DOWNLOADED_COLL, MINUTES_UPDATE_DOWNLOADED) { - var mr mgo.MapReduce - mr.Map = `function() { - emit(this.id, 1); - }` - mr.Reduce = `function(tag, vals) { - var count = 0; - vals.forEach(function() { count += 1; }); - return count; - }` - err := m.update(&mr, bson.M{"section": "download"}, statsColl, DOWNLOADED_COLL) - if err != nil { - return nil, err - } - } +func GetVisits(visitsColl *mgo.Collection) ([]Visits, error) { + var result []Visits + err := visitsColl.Find(nil).All(&result) + return result, err +} - var result []struct { - Book bson.ObjectId "_id" - } - downloadedColl := m.database.C(DOWNLOADED_COLL) - err := downloadedColl.Find(nil).Sort("-value").Limit(num).All(&result) +type MR struct { + database *mgo.Database +} + +func NewMR(database *mgo.Database) *MR { + m := new(MR) + m.database = database + return m +} + +func (m *MR) UpdateTags(booksColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + if (this.subject) { + this.subject.forEach(function(s) { emit(s, 1); }); + } + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"active": true}, booksColl, TAGS_COLL) +} + +func (m *MR) UpdateMostVisited(statsColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + if (this.id) { + emit(this.id, 1); + } + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"section": "book"}, statsColl, VISITED_COLL) +} + +func (m *MR) UpdateMostDownloaded(statsColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + emit(this.id, 1); + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"section": "download"}, statsColl, DOWNLOADED_COLL) +} + +func (m *MR) UpdateHourVisits(statsColl *mgo.Collection) error { + const numDays = 2 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate(), + this.date.getUTCHours()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_VISITS_COLL+"_raw") if err != nil { - return nil, err + return err } - - books := make([]bson.ObjectId, len(result)) - for i, r := range result { - books[i] = r.Book - } - return books, nil + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + hourly_raw := m.database.C(HOURLY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, hourly_raw, HOURLY_VISITS_COLL) } -func (m *MR) GetHourVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(HOURLY_VISITS_COLL, MINUTES_UPDATE_HOURLY) { - const reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - var mr mgo.MapReduce - mr.Map = `function() { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth(), - this.date.getUTCDate(), +func (m *MR) UpdateDayVisits(statsColl *mgo.Collection) error { + const numDays = 30 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_VISITS_COLL+"_raw") + if err != nil { + return err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + daily_raw := m.database.C(DAILY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, daily_raw, DAILY_VISITS_COLL) +} + +func (m *MR) UpdateMonthVisits(statsColl *mgo.Collection) error { + const numDays = 365 + + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_VISITS_COLL+"_raw") + if err != nil { + return err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + monthly_raw := m.database.C(MONTHLY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, monthly_raw, MONTHLY_VISITS_COLL) +} + +func (m *MR) UpdateHourDownloads(statsColl *mgo.Collection) error { + const numDays = 2 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour) + + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate(), this.date.getUTCHours()); - emit({date: date, session: this.session}, 1); - }` - mr.Reduce = reduce - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_VISITS_COLL+"_raw") - if err != nil { - return nil, err - } - var mr2 mgo.MapReduce - mr2.Map = `function() { - emit(this['_id']['date'], 1); - }` - mr2.Reduce = reduce - hourly_raw := m.database.C(HOURLY_VISITS_COLL + "_raw") - err = m.update(&mr2, bson.M{}, hourly_raw, HOURLY_VISITS_COLL) - if err != nil { - return nil, err - } - } - - var result []Visits - hourlyColl := m.database.C(HOURLY_VISITS_COLL) - err := hourlyColl.Find(nil).All(&result) - return result, err + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_DOWNLOADS_COLL) } -func (m *MR) GetDayVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(DAILY_VISITS_COLL, MINUTES_UPDATE_DAILY) { - const reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - var mr mgo.MapReduce - mr.Map = `function() { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth(), - this.date.getUTCDate()); - emit({date: date, session: this.session}, 1); - }` - mr.Reduce = reduce - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_VISITS_COLL+"_raw") - if err != nil { - return nil, err - } - var mr2 mgo.MapReduce - mr2.Map = `function() { - emit(this['_id']['date'], 1); - }` - mr2.Reduce = reduce - daily_raw := m.database.C(DAILY_VISITS_COLL + "_raw") - err = m.update(&mr2, bson.M{}, daily_raw, DAILY_VISITS_COLL) - if err != nil { - return nil, err - } - } +func (m *MR) UpdateDayDownloads(statsColl *mgo.Collection) error { + const numDays = 30 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) - var result []Visits - dailyColl := m.database.C(DAILY_VISITS_COLL) - err := dailyColl.Find(nil).All(&result) - return result, err + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate()); + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_DOWNLOADS_COLL) } -func (m *MR) GetMonthVisits(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(MONTHLY_VISITS_COLL, MINUTES_UPDATE_MONTHLY) { - const reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - var mr mgo.MapReduce - mr.Map = `function() { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth()); - emit({date: date, session: this.session}, 1); - }` - mr.Reduce = reduce - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_VISITS_COLL+"_raw") - if err != nil { - return nil, err - } - var mr2 mgo.MapReduce - mr2.Map = `function() { - emit(this['_id']['date'], 1); - }` - mr2.Reduce = reduce - monthly_raw := m.database.C(MONTHLY_VISITS_COLL + "_raw") - err = m.update(&mr2, bson.M{}, monthly_raw, MONTHLY_VISITS_COLL) - if err != nil { - return nil, err - } - } +func (m *MR) UpdateMonthDownloads(statsColl *mgo.Collection) error { + const numDays = 365 - var result []Visits - monthlyColl := m.database.C(MONTHLY_VISITS_COLL) - err := monthlyColl.Find(nil).All(&result) - return result, err -} + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) -func (m *MR) GetHourDownloads(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(HOURLY_DOWNLOADS_COLL, MINUTES_UPDATE_HOURLY) { - var mr mgo.MapReduce - mr.Map = `function() { - if (this.section == "download") { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth(), - this.date.getUTCDate(), - this.date.getUTCHours()); - emit(date, 1); - } - }` - mr.Reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_DOWNLOADS_COLL) - if err != nil { - return nil, err - } - } - - var result []Visits - hourlyColl := m.database.C(HOURLY_DOWNLOADS_COLL) - err := hourlyColl.Find(nil).All(&result) - return result, err -} - -func (m *MR) GetDayDowloads(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(DAILY_DOWNLOADS_COLL, MINUTES_UPDATE_DAILY) { - var mr mgo.MapReduce - mr.Map = `function() { - if (this.section == "download") { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth(), - this.date.getUTCDate()); - emit(date, 1); - } - }` - mr.Reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_DOWNLOADS_COLL) - if err != nil { - return nil, err - } - } - - var result []Visits - dailyColl := m.database.C(DAILY_DOWNLOADS_COLL) - err := dailyColl.Find(nil).All(&result) - return result, err -} - -func (m *MR) GetMonthDowloads(start time.Time, statsColl *mgo.Collection) ([]Visits, error) { - if m.isOutdated(MONTHLY_DOWNLOADS_COLL, MINUTES_UPDATE_MONTHLY) { - var mr mgo.MapReduce - mr.Map = `function() { - if (this.section == "download") { - var date = Date.UTC(this.date.getUTCFullYear(), - this.date.getUTCMonth()); - emit(date, 1); - } - }` - mr.Reduce = `function(date, vals) { - var count = 0; - vals.forEach(function(v) { count += v; }); - return count; - }` - err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_DOWNLOADS_COLL) - if err != nil { - return nil, err - } - } - - var result []Visits - monthlyColl := m.database.C(MONTHLY_DOWNLOADS_COLL) - err := monthlyColl.Find(nil).All(&result) - return result, err + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth()); + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_DOWNLOADS_COLL) } func (m *MR) update(mr *mgo.MapReduce, query bson.M, queryColl *mgo.Collection, storeColl string) error { diff --git a/stats.go b/stats.go index 46ea23f..4218ddf 100644 --- a/stats.go +++ b/stats.go @@ -1,5 +1,7 @@ package main +import log "github.com/cihub/seelog" + import ( "github.com/gorilla/mux" "labix.org/v2/mgo/bson" @@ -23,6 +25,8 @@ func InitStats(database *DB) { func GatherStats(function func(handler), database *DB) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { + log.Info("Query ", r.Method, " ", r.RequestURI) + var h handler h.db = database.Copy() defer h.db.Close() @@ -91,11 +95,9 @@ type visitData struct { } func getHourlyVisits(db *DB) []visitData { - const numDays = 2 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour) - visit, _ := db.GetHourVisits(start) + visit, _ := db.GetHourVisits() for _, v := range visit { var elem visitData hour := time.Unix(v.Date/1000, 0).UTC().Hour() @@ -108,11 +110,9 @@ func getHourlyVisits(db *DB) []visitData { } func getDailyVisits(db *DB) []visitData { - const numDays = 30 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) - visit, _ := db.GetDayVisits(start) + visit, _ := db.GetDayVisits() for _, v := range visit { var elem visitData day := time.Unix(v.Date/1000, 0).UTC().Day() @@ -125,11 +125,9 @@ func getDailyVisits(db *DB) []visitData { } func getMonthlyVisits(db *DB) []visitData { - const numDays = 365 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) - visit, _ := db.GetMonthVisits(start) + visit, _ := db.GetMonthVisits() for _, v := range visit { var elem visitData month := time.Unix(v.Date/1000, 0).UTC().Month() @@ -142,11 +140,9 @@ func getMonthlyVisits(db *DB) []visitData { } func getHourlyDownloads(db *DB) []visitData { - const numDays = 2 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour) - visit, _ := db.GetHourDownloads(start) + visit, _ := db.GetHourDownloads() for _, v := range visit { var elem visitData hour := time.Unix(v.Date/1000, 0).UTC().Hour() @@ -159,11 +155,9 @@ func getHourlyDownloads(db *DB) []visitData { } func getDailyDownloads(db *DB) []visitData { - const numDays = 30 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) - visit, _ := db.GetDayDownloads(start) + visit, _ := db.GetDayDownloads() for _, v := range visit { var elem visitData day := time.Unix(v.Date/1000, 0).UTC().Day() @@ -176,11 +170,9 @@ func getDailyDownloads(db *DB) []visitData { } func getMonthlyDownloads(db *DB) []visitData { - const numDays = 365 var visits []visitData - start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) - visit, _ := db.GetMonthDownloads(start) + visit, _ := db.GetMonthDownloads() for _, v := range visit { var elem visitData month := time.Unix(v.Date/1000, 0).UTC().Month() diff --git a/tasker.go b/tasker.go new file mode 100644 index 0000000..e057874 --- /dev/null +++ b/tasker.go @@ -0,0 +1,34 @@ +package main + +import log "github.com/cihub/seelog" + +import ( + "time" +) + +func InitTasks(db *DB) { + periodicTask(updateLogger, MINUTES_UPDATE_LOGGER*time.Minute) + periodicTask(db.UpdateTags, MINUTES_UPDATE_TAGS*time.Minute) + periodicTask(db.UpdateMostVisited, MINUTES_UPDATE_VISITED*time.Minute) + periodicTask(db.UpdateDownloadedBooks, MINUTES_UPDATE_DOWNLOADED*time.Minute) + periodicTask(db.UpdateHourVisits, MINUTES_UPDATE_HOURLY_V*time.Minute) + periodicTask(db.UpdateDayVisits, MINUTES_UPDATE_DAILY_V*time.Minute) + periodicTask(db.UpdateMonthVisits, MINUTES_UPDATE_MONTHLY_V*time.Minute) + periodicTask(db.UpdateHourDownloads, MINUTES_UPDATE_HOURLY_D*time.Minute) + periodicTask(db.UpdateDayDownloads, MINUTES_UPDATE_DAILY_D*time.Minute) + periodicTask(db.UpdateMonthDownloads, MINUTES_UPDATE_MONTHLY_D*time.Minute) +} + +func periodicTask(task func() error, periodicity time.Duration) { + go tasker(task, periodicity) +} + +func tasker(task func() error, periodicity time.Duration) { + for true { + time.Sleep(periodicity) + err := task() + if err != nil { + log.Error("Task error: ", err) + } + } +} diff --git a/tools/README b/tools/README index d0b3b4b..84312cd 100644 --- a/tools/README +++ b/tools/README @@ -4,6 +4,8 @@ Some dirty tools to manage trantor: $ adduser myNick Password: +- importer. import all epubs passed as parameter into the database and approve them + - update. Update the cover of all the books. It might be outdated. - togridfs (23/4/2013). Migrate all files and covers to gridfs diff --git a/tools/importer/config.go b/tools/importer/config.go new file mode 100644 index 0000000..6099708 --- /dev/null +++ b/tools/importer/config.go @@ -0,0 +1,53 @@ +package main + +const ( + PORT = "8080" + + DB_IP = "127.0.0.1" + DB_NAME = "trantor" + META_COLL = "meta" + BOOKS_COLL = "books" + TAGS_COLL = "tags" + VISITED_COLL = "visited" + DOWNLOADED_COLL = "downloaded" + HOURLY_VISITS_COLL = "visits.hourly" + DAILY_VISITS_COLL = "visits.daily" + MONTHLY_VISITS_COLL = "visits.monthly" + HOURLY_DOWNLOADS_COLL = "downloads.hourly" + DAILY_DOWNLOADS_COLL = "downloads.daily" + MONTHLY_DOWNLOADS_COLL = "downloads.monthly" + USERS_COLL = "users" + NEWS_COLL = "news" + STATS_COLL = "statistics" + FS_BOOKS = "fs_books" + FS_IMGS = "fs_imgs" + + PASS_SALT = "ImperialLibSalt" + MINUTES_UPDATE_TAGS = 11 + MINUTES_UPDATE_VISITED = 41 + MINUTES_UPDATE_DOWNLOADED = 47 + MINUTES_UPDATE_HOURLY_V = 31 + MINUTES_UPDATE_DAILY_V = 60*12 + 7 + MINUTES_UPDATE_MONTHLY_V = 60*24 + 11 + MINUTES_UPDATE_HOURLY_D = 29 + MINUTES_UPDATE_DAILY_D = 60*12 + 13 + MINUTES_UPDATE_MONTHLY_D = 60*24 + 17 + MINUTES_UPDATE_LOGGER = 5 + TAGS_DISPLAY = 50 + SEARCH_ITEMS_PAGE = 20 + NEW_ITEMS_PAGE = 50 + NUM_NEWS = 10 + DAYS_NEWS_INDEXPAGE = 15 + + TEMPLATE_PATH = "templates/" + CSS_PATH = "css/" + JS_PATH = "js/" + IMG_PATH = "img/" + LOGGER_CONFIG = "logger.xml" + + IMG_WIDTH_BIG = 300 + IMG_WIDTH_SMALL = 60 + IMG_QUALITY = 80 + + CHAN_SIZE = 100 +) diff --git a/tools/importer/cover.go b/tools/importer/cover.go new file mode 100644 index 0000000..c6e3bae --- /dev/null +++ b/tools/importer/cover.go @@ -0,0 +1,202 @@ +package main + +import log "github.com/cihub/seelog" +import _ "image/png" +import _ "image/jpeg" +import _ "image/gif" + +import ( + "bytes" + "git.gitorious.org/go-pkg/epubgo.git" + "github.com/gorilla/mux" + "github.com/nfnt/resize" + "image" + "image/jpeg" + "io" + "io/ioutil" + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "regexp" + "strings" +) + +func coverHandler(h handler) { + vars := mux.Vars(h.r) + if !bson.IsObjectIdHex(vars["id"]) { + notFound(h) + return + } + id := bson.ObjectIdHex(vars["id"]) + books, _, err := h.db.GetBooks(bson.M{"_id": id}) + if err != nil || len(books) == 0 { + notFound(h) + return + } + book := books[0] + + if !book.Active { + if !h.sess.IsAdmin() { + notFound(h) + return + } + } + + fs := h.db.GetFS(FS_IMGS) + var f *mgo.GridFile + if vars["size"] == "small" { + f, err = fs.OpenId(book.CoverSmall) + } else { + f, err = fs.OpenId(book.Cover) + } + if err != nil { + log.Error("Error while opening image: ", err) + notFound(h) + return + } + defer f.Close() + + headers := h.w.Header() + headers["Content-Type"] = []string{"image/jpeg"} + + io.Copy(h.w, f) +} + +func GetCover(e *epubgo.Epub, title string, db *DB) (bson.ObjectId, bson.ObjectId) { + imgId, smallId := coverFromMetadata(e, title, db) + if imgId != "" { + return imgId, smallId + } + + imgId, smallId = searchCommonCoverNames(e, title, db) + if imgId != "" { + return imgId, smallId + } + + /* search for img on the text */ + exp, _ := regexp.Compile("<.*ima?g.*[(src)(href)]=[\"']([^\"']*(\\.[^\\.\"']*))[\"']") + it, errNext := e.Spine() + for errNext == nil { + file, err := it.Open() + if err != nil { + break + } + defer file.Close() + + txt, err := ioutil.ReadAll(file) + if err != nil { + break + } + res := exp.FindSubmatch(txt) + if res != nil { + href := string(res[1]) + urlPart := strings.Split(it.URL(), "/") + url := strings.Join(urlPart[:len(urlPart)-1], "/") + if href[:3] == "../" { + href = href[3:] + url = strings.Join(urlPart[:len(urlPart)-2], "/") + } + href = strings.Replace(href, "%20", " ", -1) + href = strings.Replace(href, "%27", "'", -1) + href = strings.Replace(href, "%28", "(", -1) + href = strings.Replace(href, "%29", ")", -1) + if url == "" { + url = href + } else { + url = url + "/" + href + } + + img, err := e.OpenFile(url) + if err == nil { + defer img.Close() + return storeImg(img, title, db) + } + } + errNext = it.Next() + } + return "", "" +} + +func coverFromMetadata(e *epubgo.Epub, title string, db *DB) (bson.ObjectId, bson.ObjectId) { + metaList, _ := e.MetadataAttr("meta") + for _, meta := range metaList { + if meta["name"] == "cover" { + img, err := e.OpenFileId(meta["content"]) + if err == nil { + defer img.Close() + return storeImg(img, title, db) + } + } + } + return "", "" +} + +func searchCommonCoverNames(e *epubgo.Epub, title string, db *DB) (bson.ObjectId, bson.ObjectId) { + for _, p := range []string{"cover.jpg", "Images/cover.jpg", "images/cover.jpg", "cover.jpeg", "cover1.jpg", "cover1.jpeg"} { + img, err := e.OpenFile(p) + if err == nil { + defer img.Close() + return storeImg(img, title, db) + } + } + return "", "" +} + +func storeImg(img io.Reader, title string, db *DB) (bson.ObjectId, bson.ObjectId) { + /* open the files */ + fBig, err := createCoverFile(title, db) + if err != nil { + log.Error("Error creating ", title, ": ", err.Error()) + return "", "" + } + defer fBig.Close() + + fSmall, err := createCoverFile(title+"_small", db) + if err != nil { + log.Error("Error creating ", title+"_small", ": ", err.Error()) + return "", "" + } + defer fSmall.Close() + + /* resize img */ + var img2 bytes.Buffer + img1 := io.TeeReader(img, &img2) + jpgOptions := jpeg.Options{IMG_QUALITY} + imgResized, err := resizeImg(img1, IMG_WIDTH_BIG) + if err != nil { + log.Error("Error resizing big image: ", err.Error()) + return "", "" + } + err = jpeg.Encode(fBig, imgResized, &jpgOptions) + if err != nil { + log.Error("Error encoding big image: ", err.Error()) + return "", "" + } + imgSmallResized, err := resizeImg(&img2, IMG_WIDTH_SMALL) + if err != nil { + log.Error("Error resizing small image: ", err.Error()) + return "", "" + } + err = jpeg.Encode(fSmall, imgSmallResized, &jpgOptions) + if err != nil { + log.Error("Error encoding small image: ", err.Error()) + return "", "" + } + + idBig, _ := fBig.Id().(bson.ObjectId) + idSmall, _ := fSmall.Id().(bson.ObjectId) + return idBig, idSmall +} + +func createCoverFile(title string, db *DB) (*mgo.GridFile, error) { + fs := db.GetFS(FS_IMGS) + return fs.Create(title + ".jpg") +} + +func resizeImg(imgReader io.Reader, width uint) (image.Image, error) { + img, _, err := image.Decode(imgReader) + if err != nil { + return nil, err + } + + return resize.Resize(width, 0, img, resize.NearestNeighbor), nil +} diff --git a/tools/importer/database.go b/tools/importer/database.go new file mode 100644 index 0000000..e49b644 --- /dev/null +++ b/tools/importer/database.go @@ -0,0 +1,327 @@ +package main + +import log "github.com/cihub/seelog" + +import ( + "crypto/md5" + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "os" + "time" +) + +type Book struct { + Id string `bson:"_id"` + Title string + Author []string + Contributor string + Publisher string + Description string + Subject []string + Date string + Lang []string + Isbn string + Type string + Format string + Source string + Relation string + Coverage string + Rights string + Meta string + File bson.ObjectId + FileSize int + Cover bson.ObjectId + CoverSmall bson.ObjectId + Active bool + Keywords []string +} + +type News struct { + Date time.Time + Text string +} + +type DB struct { + session *mgo.Session +} + +func initDB() *DB { + var err error + d := new(DB) + d.session, err = mgo.Dial(DB_IP) + if err != nil { + log.Critical(err) + os.Exit(1) + } + return d +} + +func (d *DB) Close() { + d.session.Close() +} + +func (d *DB) Copy() *DB { + dbCopy := new(DB) + dbCopy.session = d.session.Copy() + return dbCopy +} + +func md5Pass(pass string) []byte { + h := md5.New() + hash := h.Sum(([]byte)(PASS_SALT + pass)) + return hash +} + +func (d *DB) SetPassword(user string, pass string) error { + hash := md5Pass(pass) + userColl := d.session.DB(DB_NAME).C(USERS_COLL) + return userColl.Update(bson.M{"user": user}, bson.M{"$set": bson.M{"pass": hash}}) +} + +func (d *DB) UserValid(user string, pass string) bool { + hash := md5Pass(pass) + userColl := d.session.DB(DB_NAME).C(USERS_COLL) + n, err := userColl.Find(bson.M{"user": user, "pass": hash}).Count() + if err != nil { + return false + } + return n != 0 +} + +func (d *DB) UserRole(user string) string { + type result struct { + Role string + } + res := result{} + userColl := d.session.DB(DB_NAME).C(USERS_COLL) + err := userColl.Find(bson.M{"user": user}).One(&res) + if err != nil { + return "" + } + return res.Role +} + +func (d *DB) AddNews(text string) error { + var news News + news.Text = text + news.Date = time.Now() + newsColl := d.session.DB(DB_NAME).C(NEWS_COLL) + return newsColl.Insert(news) +} + +func (d *DB) GetNews(num int, days int) (news []News, err error) { + query := bson.M{} + if days != 0 { + duration := time.Duration(-24*days) * time.Hour + date := time.Now().Add(duration) + query = bson.M{"date": bson.M{"$gt": date}} + } + newsColl := d.session.DB(DB_NAME).C(NEWS_COLL) + q := newsColl.Find(query).Sort("-date").Limit(num) + err = q.All(&news) + return +} + +func (d *DB) InsertStats(stats interface{}) error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + return statsColl.Insert(stats) +} + +func (d *DB) InsertBook(book interface{}) error { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + return booksColl.Insert(book) +} + +func (d *DB) RemoveBook(id bson.ObjectId) error { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + return booksColl.Remove(bson.M{"_id": id}) +} + +func (d *DB) UpdateBook(id bson.ObjectId, data interface{}) error { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + return booksColl.Update(bson.M{"_id": id}, bson.M{"$set": data}) +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) { + var start, length int + if len(r) > 0 { + length = r[0] + if len(r) > 1 { + start = r[1] + } + } + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + q := booksColl.Find(query).Sort("-_id") + num, err = q.Count() + if err != nil { + return + } + if start != 0 { + q = q.Skip(start) + } + if length != 0 { + q = q.Limit(length) + } + + err = q.All(&books) + for i, b := range books { + books[i].Id = bson.ObjectId(b.Id).Hex() + } + return +} + +/* Get the most visited books + */ +func (d *DB) GetVisitedBooks(num int) (books []Book, err error) { + visitedColl := d.session.DB(DB_NAME).C(VISITED_COLL) + bookId, err := GetBooksVisited(num, visitedColl) + if err != nil { + return nil, err + } + + books = make([]Book, num) + for i, id := range bookId { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + booksColl.Find(bson.M{"_id": id}).One(&books[i]) + books[i].Id = bson.ObjectId(books[i].Id).Hex() + } + return +} + +func (d *DB) UpdateMostVisited() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMostVisited(statsColl) +} + +/* Get the most downloaded books + */ +func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) { + downloadedColl := d.session.DB(DB_NAME).C(DOWNLOADED_COLL) + bookId, err := GetBooksVisited(num, downloadedColl) + if err != nil { + return nil, err + } + + books = make([]Book, num) + for i, id := range bookId { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + booksColl.Find(bson.M{"_id": id}).One(&books[i]) + books[i].Id = bson.ObjectId(books[i].Id).Hex() + } + return +} + +func (d *DB) UpdateDownloadedBooks() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMostDownloaded(statsColl) +} + +/* optional parameters: length and start index + * + * Returns: list of books, number found and err + */ +func (d *DB) GetNewBooks(r ...int) (books []Book, num int, err error) { + return d.GetBooks(bson.M{"$nor": []bson.M{{"active": true}}}, r...) +} + +func (d *DB) BookActive(id bson.ObjectId) bool { + var book Book + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + err := booksColl.Find(bson.M{"_id": id}).One(&book) + if err != nil { + return false + } + return book.Active +} + +func (d *DB) GetFS(prefix string) *mgo.GridFS { + return d.session.DB(DB_NAME).GridFS(prefix) +} + +func (d *DB) GetTags(numTags int) ([]string, error) { + tagsColl := d.session.DB(DB_NAME).C(TAGS_COLL) + return GetTags(numTags, tagsColl) +} + +func (d *DB) UpdateTags() error { + booksColl := d.session.DB(DB_NAME).C(BOOKS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateTags(booksColl) +} + +type Visits struct { + Date int64 "_id" + Count int "value" +} + +func (d *DB) GetHourVisits() ([]Visits, error) { + hourlyColl := d.session.DB(DB_NAME).C(HOURLY_VISITS_COLL) + return GetVisits(hourlyColl) +} + +func (d *DB) UpdateHourVisits() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateHourVisits(statsColl) +} + +func (d *DB) GetDayVisits() ([]Visits, error) { + dailyColl := d.session.DB(DB_NAME).C(DAILY_VISITS_COLL) + return GetVisits(dailyColl) +} + +func (d *DB) UpdateDayVisits() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateDayVisits(statsColl) +} + +func (d *DB) GetMonthVisits() ([]Visits, error) { + monthlyColl := d.session.DB(DB_NAME).C(MONTHLY_VISITS_COLL) + return GetVisits(monthlyColl) +} + +func (d *DB) UpdateMonthVisits() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMonthVisits(statsColl) +} + +func (d *DB) GetHourDownloads() ([]Visits, error) { + hourlyColl := d.session.DB(DB_NAME).C(HOURLY_DOWNLOADS_COLL) + return GetVisits(hourlyColl) +} + +func (d *DB) UpdateHourDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateHourDownloads(statsColl) +} + +func (d *DB) GetDayDownloads() ([]Visits, error) { + dailyColl := d.session.DB(DB_NAME).C(DAILY_DOWNLOADS_COLL) + return GetVisits(dailyColl) +} + +func (d *DB) UpdateDayDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateDayDownloads(statsColl) +} + +func (d *DB) GetMonthDownloads() ([]Visits, error) { + monthlyColl := d.session.DB(DB_NAME).C(MONTHLY_DOWNLOADS_COLL) + return GetVisits(monthlyColl) +} + +func (d *DB) UpdateMonthDownloads() error { + statsColl := d.session.DB(DB_NAME).C(STATS_COLL) + mr := NewMR(d.session.DB(DB_NAME)) + return mr.UpdateMonthDownloads(statsColl) +} diff --git a/tools/importer/importer.go b/tools/importer/importer.go new file mode 100644 index 0000000..b31282f --- /dev/null +++ b/tools/importer/importer.go @@ -0,0 +1,65 @@ +package main + +import log "github.com/cihub/seelog" + +import ( + "git.gitorious.org/go-pkg/epubgo.git" + "net/http" + "os" +) + +func main() { + db := initDB() + defer db.Close() + + for _, file := range os.Args[1:len(os.Args)] { + uploadEpub(file, db) + } +} + +func uploadEpub(filename string, db *DB) { + epub, err := epubgo.Open(filename) + if err != nil { + log.Error("Not valid epub '", filename, "': ", err) + return + } + defer epub.Close() + + book := parseFile(epub, db) + title, _ := book["title"].(string) + _, numTitleFound, _ := db.GetBooks(buildQuery("title:"+title), 1) + if numTitleFound == 0 { + book["active"] = true + } + + file, _ := os.Open(filename) + defer file.Close() + id, size, err := StoreNewFile(title+".epub", file, db) + if err != nil { + log.Error("Error storing book (", title, "): ", err) + return + } + + book["filename"] = id + book["filenamesize"] = size + err = db.InsertBook(book) + if err != nil { + log.Error("Error storing metadata (", title, "): ", err) + return + } + log.Info("File uploaded: ", filename) +} + +type Status struct { + Upload bool + Stats bool + Search string +} + +func GetStatus(h handler) Status { + return Status{} +} + +func loadTemplate(w http.ResponseWriter, tmpl string, data interface{}) {} +func loadTxtTemplate(w http.ResponseWriter, tmpl string, data interface{}) {} +func notFound(h handler) {} diff --git a/tools/importer/mapreduce.go b/tools/importer/mapreduce.go new file mode 100644 index 0000000..c24deec --- /dev/null +++ b/tools/importer/mapreduce.go @@ -0,0 +1,283 @@ +package main + +import ( + "labix.org/v2/mgo" + "labix.org/v2/mgo/bson" + "time" +) + +func GetTags(numTags int, tagsColl *mgo.Collection) ([]string, error) { + var result []struct { + Tag string "_id" + } + err := tagsColl.Find(nil).Sort("-value").Limit(numTags).All(&result) + if err != nil { + return nil, err + } + + tags := make([]string, len(result)) + for i, r := range result { + tags[i] = r.Tag + } + return tags, nil +} + +func GetBooksVisited(num int, visitedColl *mgo.Collection) ([]bson.ObjectId, error) { + var result []struct { + Book bson.ObjectId "_id" + } + err := visitedColl.Find(nil).Sort("-value").Limit(num).All(&result) + if err != nil { + return nil, err + } + + books := make([]bson.ObjectId, len(result)) + for i, r := range result { + books[i] = r.Book + } + return books, nil +} + +func GetVisits(visitsColl *mgo.Collection) ([]Visits, error) { + var result []Visits + err := visitsColl.Find(nil).All(&result) + return result, err +} + +type MR struct { + database *mgo.Database +} + +func NewMR(database *mgo.Database) *MR { + m := new(MR) + m.database = database + return m +} + +func (m *MR) UpdateTags(booksColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + if (this.subject) { + this.subject.forEach(function(s) { emit(s, 1); }); + } + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"active": true}, booksColl, TAGS_COLL) +} + +func (m *MR) UpdateMostVisited(statsColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + if (this.id) { + emit(this.id, 1); + } + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"section": "book"}, statsColl, VISITED_COLL) +} + +func (m *MR) UpdateMostDownloaded(statsColl *mgo.Collection) error { + var mr mgo.MapReduce + mr.Map = `function() { + emit(this.id, 1); + }` + mr.Reduce = `function(tag, vals) { + var count = 0; + vals.forEach(function() { count += 1; }); + return count; + }` + return m.update(&mr, bson.M{"section": "download"}, statsColl, DOWNLOADED_COLL) +} + +func (m *MR) UpdateHourVisits(statsColl *mgo.Collection) error { + const numDays = 2 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate(), + this.date.getUTCHours()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_VISITS_COLL+"_raw") + if err != nil { + return err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + hourly_raw := m.database.C(HOURLY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, hourly_raw, HOURLY_VISITS_COLL) +} + +func (m *MR) UpdateDayVisits(statsColl *mgo.Collection) error { + const numDays = 30 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_VISITS_COLL+"_raw") + if err != nil { + return err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + daily_raw := m.database.C(DAILY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, daily_raw, DAILY_VISITS_COLL) +} + +func (m *MR) UpdateMonthVisits(statsColl *mgo.Collection) error { + const numDays = 365 + + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + const reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + var mr mgo.MapReduce + mr.Map = `function() { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth()); + emit({date: date, session: this.session}, 1); + }` + mr.Reduce = reduce + err := m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_VISITS_COLL+"_raw") + if err != nil { + return err + } + var mr2 mgo.MapReduce + mr2.Map = `function() { + emit(this['_id']['date'], 1); + }` + mr2.Reduce = reduce + monthly_raw := m.database.C(MONTHLY_VISITS_COLL + "_raw") + return m.update(&mr2, bson.M{}, monthly_raw, MONTHLY_VISITS_COLL) +} + +func (m *MR) UpdateHourDownloads(statsColl *mgo.Collection) error { + const numDays = 2 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour) + + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate(), + this.date.getUTCHours()); + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, HOURLY_DOWNLOADS_COLL) +} + +func (m *MR) UpdateDayDownloads(statsColl *mgo.Collection) error { + const numDays = 30 + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth(), + this.date.getUTCDate()); + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, DAILY_DOWNLOADS_COLL) +} + +func (m *MR) UpdateMonthDownloads(statsColl *mgo.Collection) error { + const numDays = 365 + + start := time.Now().UTC().Add(-numDays * 24 * time.Hour).Truncate(24 * time.Hour) + + var mr mgo.MapReduce + mr.Map = `function() { + if (this.section == "download") { + var date = Date.UTC(this.date.getUTCFullYear(), + this.date.getUTCMonth()); + emit(date, 1); + } + }` + mr.Reduce = `function(date, vals) { + var count = 0; + vals.forEach(function(v) { count += v; }); + return count; + }` + return m.update(&mr, bson.M{"date": bson.M{"$gte": start}}, statsColl, MONTHLY_DOWNLOADS_COLL) +} + +func (m *MR) update(mr *mgo.MapReduce, query bson.M, queryColl *mgo.Collection, storeColl string) error { + metaColl := m.database.C(META_COLL) + _, err := metaColl.RemoveAll(bson.M{"type": storeColl}) + if err != nil { + return err + } + + mr.Out = bson.M{"replace": storeColl} + _, err = queryColl.Find(query).MapReduce(mr, nil) + if err != nil { + return err + } + + return metaColl.Insert(bson.M{"type": storeColl}) +} + +func (m *MR) isOutdated(coll string, minutes float64) bool { + var result struct { + Id bson.ObjectId `bson:"_id"` + } + metaColl := m.database.C(META_COLL) + err := metaColl.Find(bson.M{"type": coll}).One(&result) + if err != nil { + return true + } + + lastUpdate := result.Id.Time() + return time.Since(lastUpdate).Minutes() > minutes +} diff --git a/tools/importer/search.go b/tools/importer/search.go new file mode 100644 index 0000000..9f94543 --- /dev/null +++ b/tools/importer/search.go @@ -0,0 +1,85 @@ +package main + +import ( + "labix.org/v2/mgo/bson" + "net/http" + "strconv" + "strings" +) + +func buildQuery(q string) bson.M { + var reg []bson.RegEx + query := bson.M{"active": true} + words := strings.Split(q, " ") + for _, w := range words { + tag := strings.SplitN(w, ":", 2) + if len(tag) > 1 { + query[tag[0]] = bson.RegEx{tag[1], "i"} + } else { + reg = append(reg, bson.RegEx{w, "i"}) + } + } + if len(reg) > 0 { + query["keywords"] = bson.M{"$all": reg} + } + return query +} + +type searchData struct { + S Status + Found int + Books []Book + ItemsPage int + Page int + Next string + Prev string +} + +func searchHandler(h handler) { + err := h.r.ParseForm() + if err != nil { + http.Error(h.w, err.Error(), http.StatusInternalServerError) + return + } + req := strings.Join(h.r.Form["q"], " ") + page := 0 + if len(h.r.Form["p"]) != 0 { + page, err = strconv.Atoi(h.r.Form["p"][0]) + if err != nil { + page = 0 + } + } + items_page := itemsPage(h.r) + res, num, _ := h.db.GetBooks(buildQuery(req), items_page, page*items_page) + + var data searchData + data.S = GetStatus(h) + data.S.Search = req + data.Books = res + data.ItemsPage = items_page + data.Found = num + data.Page = page + 1 + if num > (page+1)*items_page { + data.Next = "/search/?q=" + req + "&p=" + strconv.Itoa(page+1) + "&num=" + strconv.Itoa(items_page) + } + if page > 0 { + data.Prev = "/search/?q=" + req + "&p=" + strconv.Itoa(page-1) + "&num=" + strconv.Itoa(items_page) + } + + format := h.r.Form["fmt"] + if (len(format) > 0) && (format[0] == "rss") { + loadTxtTemplate(h.w, "search_rss.xml", data) + } else { + loadTemplate(h.w, "search", data) + } +} + +func itemsPage(r *http.Request) int { + if len(r.Form["num"]) > 0 { + items_page, err := strconv.Atoi(r.Form["num"][0]) + if err == nil { + return items_page + } + } + return SEARCH_ITEMS_PAGE +} diff --git a/tools/importer/session.go b/tools/importer/session.go new file mode 100644 index 0000000..e958cdc --- /dev/null +++ b/tools/importer/session.go @@ -0,0 +1,81 @@ +package main + +import ( + "encoding/hex" + "github.com/gorilla/securecookie" + "github.com/gorilla/sessions" + "net/http" +) + +var sesStore = sessions.NewCookieStore(securecookie.GenerateRandomKey(64)) + +type Notification struct { + Title string + Msg string + Type string /* error, info or success */ +} + +type Session struct { + User string + Role string + S *sessions.Session +} + +func GetSession(r *http.Request, db *DB) (s *Session) { + s = new(Session) + var err error + s.S, err = sesStore.Get(r, "session") + if err == nil && !s.S.IsNew { + s.User, _ = s.S.Values["user"].(string) + s.Role = db.UserRole(s.User) + } + + if s.S.IsNew { + s.S.Values["id"] = hex.EncodeToString(securecookie.GenerateRandomKey(16)) + } + + return +} + +func (s *Session) GetNotif() []Notification { + session := s.S + msgs := session.Flashes("nMsg") + titles := session.Flashes("nTitle") + tpes := session.Flashes("nType") + notif := make([]Notification, len(msgs)) + for i, m := range msgs { + msg, _ := m.(string) + title, _ := titles[i].(string) + tpe, _ := tpes[i].(string) + notif[i] = Notification{title, msg, tpe} + } + return notif +} + +func (s *Session) LogIn(user string) { + s.User = user + s.S.Values["user"] = user +} + +func (s *Session) LogOut() { + s.S.Values["user"] = "" +} + +func (s *Session) Notify(title, msg, tpe string) { + s.S.AddFlash(msg, "nMsg") + s.S.AddFlash(title, "nTitle") + s.S.AddFlash(tpe, "nType") +} + +func (s *Session) Save(w http.ResponseWriter, r *http.Request) { + sesStore.Save(r, w, s.S) +} + +func (s *Session) Id() string { + id, _ := s.S.Values["id"].(string) + return id +} + +func (s *Session) IsAdmin() bool { + return s.Role == "admin" +} diff --git a/tools/importer/stats.go b/tools/importer/stats.go new file mode 100644 index 0000000..4218ddf --- /dev/null +++ b/tools/importer/stats.go @@ -0,0 +1,244 @@ +package main + +import log "github.com/cihub/seelog" + +import ( + "github.com/gorilla/mux" + "labix.org/v2/mgo/bson" + "net/http" + "strconv" + "strings" + "time" +) + +type handler struct { + w http.ResponseWriter + r *http.Request + sess *Session + db *DB +} + +func InitStats(database *DB) { + statsChannel = make(chan statsRequest, CHAN_SIZE) + go statsWorker(database) +} + +func GatherStats(function func(handler), database *DB) func(http.ResponseWriter, *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + log.Info("Query ", r.Method, " ", r.RequestURI) + + var h handler + h.db = database.Copy() + defer h.db.Close() + + h.w = w + h.r = r + h.sess = GetSession(r, h.db) + function(h) + + statsChannel <- statsRequest{bson.Now(), mux.Vars(r), h.sess, r} + } +} + +var statsChannel chan statsRequest + +type statsRequest struct { + date time.Time + vars map[string]string + sess *Session + r *http.Request +} + +func statsWorker(database *DB) { + db := database.Copy() + defer db.Close() + + for req := range statsChannel { + stats := make(map[string]interface{}) + appendFiles(req.r, stats) + appendMuxVars(req.vars, stats) + appendUrl(req.r, stats) + appendSession(req.sess, stats) + stats["method"] = req.r.Method + stats["date"] = req.date + db.InsertStats(stats) + } +} + +func statsHandler(h handler) { + var data statsData + data.S = GetStatus(h) + data.S.Stats = true + data.HVisits = getHourlyVisits(h.db) + data.DVisits = getDailyVisits(h.db) + data.MVisits = getMonthlyVisits(h.db) + data.HDownloads = getHourlyDownloads(h.db) + data.DDownloads = getDailyDownloads(h.db) + data.MDownloads = getMonthlyDownloads(h.db) + + loadTemplate(h.w, "stats", data) +} + +type statsData struct { + S Status + HVisits []visitData + DVisits []visitData + MVisits []visitData + HDownloads []visitData + DDownloads []visitData + MDownloads []visitData +} + +type visitData struct { + Label string + Count int +} + +func getHourlyVisits(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetHourVisits() + for _, v := range visit { + var elem visitData + hour := time.Unix(v.Date/1000, 0).UTC().Hour() + elem.Label = strconv.Itoa(hour + 1) + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func getDailyVisits(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetDayVisits() + for _, v := range visit { + var elem visitData + day := time.Unix(v.Date/1000, 0).UTC().Day() + elem.Label = strconv.Itoa(day) + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func getMonthlyVisits(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetMonthVisits() + for _, v := range visit { + var elem visitData + month := time.Unix(v.Date/1000, 0).UTC().Month() + elem.Label = month.String() + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func getHourlyDownloads(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetHourDownloads() + for _, v := range visit { + var elem visitData + hour := time.Unix(v.Date/1000, 0).UTC().Hour() + elem.Label = strconv.Itoa(hour + 1) + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func getDailyDownloads(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetDayDownloads() + for _, v := range visit { + var elem visitData + day := time.Unix(v.Date/1000, 0).UTC().Day() + elem.Label = strconv.Itoa(day) + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func getMonthlyDownloads(db *DB) []visitData { + var visits []visitData + + visit, _ := db.GetMonthDownloads() + for _, v := range visit { + var elem visitData + month := time.Unix(v.Date/1000, 0).UTC().Month() + elem.Label = month.String() + elem.Count = v.Count + visits = append(visits, elem) + } + + return visits +} + +func appendFiles(r *http.Request, stats map[string]interface{}) { + if r.Method == "POST" && r.MultipartForm != nil { + files := r.MultipartForm.File + for key := range files { + list := make([]string, len(files[key])) + for i, f := range files[key] { + list[i] = f.Filename + } + stats[key] = list + } + } +} + +func appendMuxVars(vars map[string]string, stats map[string]interface{}) { + for key, value := range vars { + switch { + case key == "id": + if bson.IsObjectIdHex(value) { + stats["id"] = bson.ObjectIdHex(value) + } + case key == "ids": + var objectIds []bson.ObjectId + ids := strings.Split(value, "/") + for _, id := range ids { + if bson.IsObjectIdHex(value) { + objectIds = append(objectIds, bson.ObjectIdHex(id)) + } + } + if len(objectIds) > 0 { + stats["ids"] = objectIds + stats["id"] = objectIds[0] + } + default: + stats[key] = value + } + } +} + +func appendUrl(r *http.Request, stats map[string]interface{}) { + for key, value := range r.URL.Query() { + stats[key] = value + } + stats["host"] = r.Host + stats["path"] = r.URL.Path + pattern := strings.Split(r.URL.Path, "/") + if len(pattern) > 1 && pattern[1] != "" { + stats["section"] = pattern[1] + } else { + stats["section"] = "/" + } +} + +func appendSession(sess *Session, stats map[string]interface{}) { + stats["session"] = sess.Id() + if sess.User != "" { + stats["user"] = sess.User + } +} diff --git a/tools/importer/store.go b/tools/importer/store.go new file mode 100644 index 0000000..5b0ee8c --- /dev/null +++ b/tools/importer/store.go @@ -0,0 +1,128 @@ +package main + +import ( + "bytes" + "git.gitorious.org/go-pkg/epubgo.git" + "io" + "io/ioutil" + "labix.org/v2/mgo/bson" + "regexp" + "strings" +) + +func OpenBook(id bson.ObjectId, db *DB) (*epubgo.Epub, error) { + fs := db.GetFS(FS_BOOKS) + f, err := fs.OpenId(id) + if err != nil { + return nil, err + } + defer f.Close() + + buff, err := ioutil.ReadAll(f) + reader := bytes.NewReader(buff) + + return epubgo.Load(reader, int64(len(buff))) +} + +func StoreNewFile(name string, file io.Reader, db *DB) (bson.ObjectId, int64, error) { + fs := db.GetFS(FS_BOOKS) + fw, err := fs.Create(name) + if err != nil { + return "", 0, err + } + defer fw.Close() + + size, err := io.Copy(fw, file) + id, _ := fw.Id().(bson.ObjectId) + return id, size, err +} + +func DeleteFile(id bson.ObjectId, db *DB) error { + fs := db.GetFS(FS_BOOKS) + return fs.RemoveId(id) +} + +func DeleteCover(id bson.ObjectId, db *DB) error { + fs := db.GetFS(FS_IMGS) + return fs.RemoveId(id) +} + +func DeleteBook(book Book, db *DB) { + if book.Cover != "" { + DeleteCover(book.Cover, db) + } + if book.CoverSmall != "" { + DeleteCover(book.CoverSmall, db) + } + DeleteFile(book.File, db) +} + +func cleanStr(str string) string { + str = strings.Replace(str, "'", "'", -1) + exp, _ := regexp.Compile("&[^;]*;") + str = exp.ReplaceAllString(str, "") + exp, _ = regexp.Compile("[ ,]*$") + str = exp.ReplaceAllString(str, "") + return str +} + +func parseAuthr(creator []string) []string { + exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$") + exp2, _ := regexp.Compile("^[^:]*: *(.*)$") + res := make([]string, len(creator)) + for i, s := range creator { + auth := exp1.FindStringSubmatch(s) + if auth != nil { + res[i] = cleanStr(strings.Join(auth[2:], ", ")) + } else { + auth := exp2.FindStringSubmatch(s) + if auth != nil { + res[i] = cleanStr(auth[1]) + } else { + res[i] = cleanStr(s) + } + } + } + return res +} + +func parseDescription(description []string) string { + str := cleanStr(strings.Join(description, "\n")) + str = strings.Replace(str, "

", "\n", -1) + exp, _ := regexp.Compile("<[^>]*>") + str = exp.ReplaceAllString(str, "") + str = strings.Replace(str, "&", "&", -1) + str = strings.Replace(str, "<", "<", -1) + str = strings.Replace(str, ">", ">", -1) + str = strings.Replace(str, "\\n", "\n", -1) + return str +} + +func parseSubject(subject []string) []string { + var res []string + for _, s := range subject { + res = append(res, strings.Split(s, " / ")...) + } + return res +} + +func parseDate(date []string) string { + if len(date) == 0 { + return "" + } + return strings.Replace(date[0], "Unspecified: ", "", -1) +} + +func keywords(b map[string]interface{}) (k []string) { + title, _ := b["title"].(string) + k = strings.Split(title, " ") + author, _ := b["author"].([]string) + for _, a := range author { + k = append(k, strings.Split(a, " ")...) + } + publisher, _ := b["publisher"].(string) + k = append(k, strings.Split(publisher, " ")...) + subject, _ := b["subject"].([]string) + k = append(k, subject...) + return +} diff --git a/tools/importer/upload.go b/tools/importer/upload.go new file mode 100644 index 0000000..8f05f0a --- /dev/null +++ b/tools/importer/upload.go @@ -0,0 +1,146 @@ +package main + +import log "github.com/cihub/seelog" + +import ( + "bytes" + "git.gitorious.org/go-pkg/epubgo.git" + "io/ioutil" + "mime/multipart" + "strings" +) + +func InitUpload(database *DB) { + uploadChannel = make(chan uploadRequest, CHAN_SIZE) + go uploadWorker(database) +} + +var uploadChannel chan uploadRequest + +type uploadRequest struct { + file multipart.File + filename string +} + +func uploadWorker(database *DB) { + db := database.Copy() + defer db.Close() + + for req := range uploadChannel { + processFile(req, db) + } +} + +func processFile(req uploadRequest, db *DB) { + defer req.file.Close() + + epub, err := openMultipartEpub(req.file) + if err != nil { + log.Warn("Not valid epub uploaded file ", req.filename, ": ", err) + return + } + defer epub.Close() + + book := parseFile(epub, db) + title, _ := book["title"].(string) + req.file.Seek(0, 0) + id, size, err := StoreNewFile(title+".epub", req.file, db) + if err != nil { + log.Error("Error storing book (", title, "): ", err) + return + } + + book["file"] = id + book["filesize"] = size + err = db.InsertBook(book) + if err != nil { + log.Error("Error storing metadata (", title, "): ", err) + return + } + log.Info("File uploaded: ", req.filename) +} + +func uploadPostHandler(h handler) { + problem := false + + h.r.ParseMultipartForm(20000000) + filesForm := h.r.MultipartForm.File["epub"] + for _, f := range filesForm { + file, err := f.Open() + if err != nil { + log.Error("Can not open uploaded file ", f.Filename, ": ", err) + h.sess.Notify("Upload problem!", "There was a problem with book "+f.Filename, "error") + problem = true + continue + } + uploadChannel <- uploadRequest{file, f.Filename} + } + + if !problem { + if len(filesForm) > 0 { + h.sess.Notify("Upload successful!", "Thank you for your contribution", "success") + } else { + h.sess.Notify("Upload problem!", "No books where uploaded.", "error") + } + } + uploadHandler(h) +} + +func uploadHandler(h handler) { + var data uploadData + data.S = GetStatus(h) + data.S.Upload = true + loadTemplate(h.w, "upload", data) +} + +type uploadData struct { + S Status +} + +func openMultipartEpub(file multipart.File) (*epubgo.Epub, error) { + buff, _ := ioutil.ReadAll(file) + reader := bytes.NewReader(buff) + return epubgo.Load(reader, int64(len(buff))) +} + +func parseFile(epub *epubgo.Epub, db *DB) map[string]interface{} { + book := map[string]interface{}{} + for _, m := range epub.MetadataFields() { + data, err := epub.Metadata(m) + if err != nil { + continue + } + switch m { + case "creator": + book["author"] = parseAuthr(data) + case "description": + book[m] = parseDescription(data) + case "subject": + book[m] = parseSubject(data) + case "date": + book[m] = parseDate(data) + case "language": + book["lang"] = data + case "title", "contributor", "publisher": + book[m] = cleanStr(strings.Join(data, ", ")) + case "identifier": + attr, _ := epub.MetadataAttr(m) + for i, d := range data { + if attr[i]["scheme"] == "ISBN" { + book["isbn"] = d + } + } + default: + book[m] = strings.Join(data, ", ") + } + } + title, _ := book["title"].(string) + book["file"] = nil + cover, coverSmall := GetCover(epub, title, db) + if cover != "" { + book["cover"] = cover + book["coversmall"] = coverSmall + } + book["keywords"] = keywords(book) + return book +} diff --git a/trantor.go b/trantor.go index 615d132..4fb126e 100644 --- a/trantor.go +++ b/trantor.go @@ -1,10 +1,11 @@ package main +import log "github.com/cihub/seelog" + import ( "github.com/gorilla/mux" "io" "labix.org/v2/mgo/bson" - "log" "net/http" "strings" ) @@ -31,7 +32,7 @@ func logoutHandler(h handler) { h.sess.LogOut() h.sess.Notify("Log out!", "Bye bye "+h.sess.User, "success") h.sess.Save(h.w, h.r) - log.Println("User", h.sess.User, "log out") + log.Info("User ", h.sess.User, " log out") http.Redirect(h.w, h.r, "/", http.StatusFound) } @@ -129,18 +130,35 @@ func notFound(h handler) { loadTemplate(h.w, "404", data) } +func updateLogger() error { + logger, err := log.LoggerFromConfigAsFile(LOGGER_CONFIG) + if err != nil { + return err + } + + return log.ReplaceLogger(logger) +} + func main() { + defer log.Flush() + err := updateLogger() + if err != nil { + log.Error("Error loading the logger xml: ", err) + } + log.Info("Start the imperial library of trantor") + db := initDB() defer db.Close() + InitTasks(db) InitStats(db) InitUpload(db) - setUpRouter(db) - panic(http.ListenAndServe(":"+PORT, nil)) + initRouter(db) + log.Error(http.ListenAndServe(":"+PORT, nil)) } -func setUpRouter(db *DB) { +func initRouter(db *DB) { r := mux.NewRouter() var notFoundHandler http.HandlerFunc notFoundHandler = GatherStats(notFound, db) diff --git a/upload.go b/upload.go index fed15ed..8f05f0a 100644 --- a/upload.go +++ b/upload.go @@ -1,10 +1,11 @@ package main +import log "github.com/cihub/seelog" + import ( "bytes" "git.gitorious.org/go-pkg/epubgo.git" "io/ioutil" - "log" "mime/multipart" "strings" ) @@ -35,7 +36,7 @@ func processFile(req uploadRequest, db *DB) { epub, err := openMultipartEpub(req.file) if err != nil { - log.Println("Not valid epub uploaded file", req.filename, ":", err) + log.Warn("Not valid epub uploaded file ", req.filename, ": ", err) return } defer epub.Close() @@ -45,7 +46,7 @@ func processFile(req uploadRequest, db *DB) { req.file.Seek(0, 0) id, size, err := StoreNewFile(title+".epub", req.file, db) if err != nil { - log.Println("Error storing book (", title, "):", err) + log.Error("Error storing book (", title, "): ", err) return } @@ -53,10 +54,10 @@ func processFile(req uploadRequest, db *DB) { book["filesize"] = size err = db.InsertBook(book) if err != nil { - log.Println("Error storing metadata (", title, "):", err) + log.Error("Error storing metadata (", title, "): ", err) return } - log.Println("File uploaded:", req.filename) + log.Info("File uploaded: ", req.filename) } func uploadPostHandler(h handler) { @@ -67,7 +68,7 @@ func uploadPostHandler(h handler) { for _, f := range filesForm { file, err := f.Open() if err != nil { - log.Println("Can not open uploaded file", f.Filename, ":", err) + log.Error("Can not open uploaded file ", f.Filename, ": ", err) h.sess.Notify("Upload problem!", "There was a problem with book "+f.Filename, "error") problem = true continue