Dirty migration tool for gridfs
This commit is contained in:
parent
d33a28a2bd
commit
71c8a8946a
5 changed files with 485 additions and 1 deletions
|
@ -1,7 +1,9 @@
|
|||
Some tools to manage trantor:
|
||||
Some tools dirty to manage trantor:
|
||||
|
||||
- adduser. Used to add users to trantor:
|
||||
$ adduser myNick
|
||||
Password:
|
||||
|
||||
- update. Update the cover of all the books. It might be outdated.
|
||||
|
||||
- togridfs (23/4/2013). Migrate all files and covers to gridfs
|
||||
|
|
32
tools/togridfs/config.go
Normal file
32
tools/togridfs/config.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package main
|
||||
|
||||
const (
|
||||
PORT = "8080"
|
||||
|
||||
DB_IP = "127.0.0.1"
|
||||
DB_NAME = "trantor"
|
||||
META_COLL = "meta"
|
||||
BOOKS_COLL = "books"
|
||||
TAGS_COLL = "tags"
|
||||
USERS_COLL = "users"
|
||||
STATS_COLL = "statistics"
|
||||
FS_BOOKS = "fs_books"
|
||||
FS_IMGS = "fs_imgs"
|
||||
|
||||
PASS_SALT = "ImperialLibSalt"
|
||||
MINUTES_UPDATE_TAGS = 10
|
||||
TAGS_DISPLAY = 50
|
||||
SEARCH_ITEMS_PAGE = 20
|
||||
NEW_ITEMS_PAGE = 50
|
||||
|
||||
TEMPLATE_PATH = "templates/"
|
||||
CSS_PATH = "css/"
|
||||
JS_PATH = "js/"
|
||||
IMG_PATH = "img/"
|
||||
|
||||
IMG_WIDTH_BIG = 300
|
||||
IMG_WIDTH_SMALL = 60
|
||||
IMG_QUALITY = 80
|
||||
|
||||
STATS_CHAN_SIZE = 100
|
||||
)
|
137
tools/togridfs/cover.go
Normal file
137
tools/togridfs/cover.go
Normal file
|
@ -0,0 +1,137 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"git.gitorious.org/go-pkg/epubgo.git"
|
||||
"github.com/nfnt/resize"
|
||||
"image"
|
||||
"image/jpeg"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"log"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetCover(e *epubgo.Epub, title string) (bson.ObjectId, bson.ObjectId) {
|
||||
imgId, smallId := searchCommonCoverNames(e, title)
|
||||
if imgId != "" {
|
||||
return imgId, smallId
|
||||
}
|
||||
|
||||
/* search for img on the text */
|
||||
exp, _ := regexp.Compile("<ima?g.*[(src)(href)]=[\"']([^\"']*(\\.[^\\.\"']*))[\"']")
|
||||
it, errNext := e.Spine()
|
||||
for errNext == nil {
|
||||
file, err := it.Open()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
txt, err := ioutil.ReadAll(file)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
res := exp.FindSubmatch(txt)
|
||||
if res != nil {
|
||||
href := string(res[1])
|
||||
urlPart := strings.Split(it.Url(), "/")
|
||||
url := strings.Join(urlPart[:len(urlPart)-1], "/")
|
||||
if href[:3] == "../" {
|
||||
href = href[3:]
|
||||
url = strings.Join(urlPart[:len(urlPart)-2], "/")
|
||||
}
|
||||
href = strings.Replace(href, "%20", " ", -1)
|
||||
href = strings.Replace(href, "%27", "'", -1)
|
||||
href = strings.Replace(href, "%28", "(", -1)
|
||||
href = strings.Replace(href, "%29", ")", -1)
|
||||
if url == "" {
|
||||
url = href
|
||||
} else {
|
||||
url = url + "/" + href
|
||||
}
|
||||
|
||||
img, err := e.OpenFile(url)
|
||||
if err == nil {
|
||||
defer img.Close()
|
||||
return storeImg(img, title)
|
||||
}
|
||||
}
|
||||
errNext = it.Next()
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func searchCommonCoverNames(e *epubgo.Epub, title string) (bson.ObjectId, bson.ObjectId) {
|
||||
for _, p := range []string{"cover.jpg", "Images/cover.jpg", "cover.jpeg", "cover1.jpg", "cover1.jpeg"} {
|
||||
img, err := e.OpenFile(p)
|
||||
if err == nil {
|
||||
defer img.Close()
|
||||
return storeImg(img, title)
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func storeImg(img io.Reader, title string) (bson.ObjectId, bson.ObjectId) {
|
||||
/* open the files */
|
||||
fBig, err := createCoverFile(title)
|
||||
if err != nil {
|
||||
log.Println("Error creating", title, ":", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
defer fBig.Close()
|
||||
|
||||
fSmall, err := createCoverFile(title + "_small")
|
||||
if err != nil {
|
||||
log.Println("Error creating", title+"_small", ":", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
defer fSmall.Close()
|
||||
|
||||
/* resize img */
|
||||
var img2 bytes.Buffer
|
||||
img1 := io.TeeReader(img, &img2)
|
||||
jpgOptions := jpeg.Options{IMG_QUALITY}
|
||||
imgResized, err := resizeImg(img1, IMG_WIDTH_BIG)
|
||||
if err != nil {
|
||||
log.Println("Error resizing big image:", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
err = jpeg.Encode(fBig, imgResized, &jpgOptions)
|
||||
if err != nil {
|
||||
log.Println("Error encoding big image:", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
imgSmallResized, err := resizeImg(&img2, IMG_WIDTH_SMALL)
|
||||
if err != nil {
|
||||
log.Println("Error resizing small image:", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
err = jpeg.Encode(fSmall, imgSmallResized, &jpgOptions)
|
||||
if err != nil {
|
||||
log.Println("Error encoding small image:", err.Error())
|
||||
return "", ""
|
||||
}
|
||||
|
||||
idBig, _ := fBig.Id().(bson.ObjectId)
|
||||
idSmall, _ := fSmall.Id().(bson.ObjectId)
|
||||
return idBig, idSmall
|
||||
}
|
||||
|
||||
func createCoverFile(title string) (*mgo.GridFile, error) {
|
||||
fs := db.GetFS(FS_IMGS)
|
||||
return fs.Create(title + ".jpg")
|
||||
}
|
||||
|
||||
func resizeImg(imgReader io.Reader, width uint) (image.Image, error) {
|
||||
img, _, err := image.Decode(imgReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resize.Resize(width, 0, img, resize.NearestNeighbor), nil
|
||||
}
|
252
tools/togridfs/database.go
Normal file
252
tools/togridfs/database.go
Normal file
|
@ -0,0 +1,252 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"labix.org/v2/mgo"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
META_TYPE_TAGS = "tags updated"
|
||||
)
|
||||
|
||||
var db *DB
|
||||
|
||||
type Book struct {
|
||||
Id string `bson:"_id"`
|
||||
Title string
|
||||
Author []string
|
||||
Contributor string
|
||||
Publisher string
|
||||
Description string
|
||||
Subject []string
|
||||
Date string
|
||||
Lang []string
|
||||
Isbn string
|
||||
Type string
|
||||
Format string
|
||||
Source string
|
||||
Relation string
|
||||
Coverage string
|
||||
Rights string
|
||||
Meta string
|
||||
File bson.ObjectId
|
||||
Cover bson.ObjectId
|
||||
CoverSmall bson.ObjectId
|
||||
Active bool
|
||||
Keywords []string
|
||||
Path string
|
||||
}
|
||||
|
||||
type DB struct {
|
||||
session *mgo.Session
|
||||
meta *mgo.Collection
|
||||
books *mgo.Collection
|
||||
tags *mgo.Collection
|
||||
user *mgo.Collection
|
||||
stats *mgo.Collection
|
||||
}
|
||||
|
||||
func initDB() *DB {
|
||||
var err error
|
||||
d := new(DB)
|
||||
d.session, err = mgo.Dial(DB_IP)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
database := d.session.DB(DB_NAME)
|
||||
d.meta = database.C(META_COLL)
|
||||
d.books = database.C(BOOKS_COLL)
|
||||
d.tags = database.C(TAGS_COLL)
|
||||
d.user = database.C(USERS_COLL)
|
||||
d.stats = database.C(STATS_COLL)
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *DB) Close() {
|
||||
d.session.Close()
|
||||
}
|
||||
|
||||
func md5Pass(pass string) []byte {
|
||||
h := md5.New()
|
||||
hash := h.Sum(([]byte)(PASS_SALT + pass))
|
||||
return hash
|
||||
}
|
||||
|
||||
func (d *DB) SetPassword(user string, pass string) error {
|
||||
hash := md5Pass(pass)
|
||||
return d.user.Update(bson.M{"user": user}, bson.M{"$set": bson.M{"pass": hash}})
|
||||
}
|
||||
|
||||
func (d *DB) UserValid(user string, pass string) bool {
|
||||
hash := md5Pass(pass)
|
||||
n, err := d.user.Find(bson.M{"user": user, "pass": hash}).Count()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return n != 0
|
||||
}
|
||||
|
||||
func (d *DB) InsertStats(stats interface{}) error {
|
||||
return d.stats.Insert(stats)
|
||||
}
|
||||
|
||||
func (d *DB) InsertBook(book interface{}) error {
|
||||
return d.books.Insert(book)
|
||||
}
|
||||
|
||||
func (d *DB) RemoveBook(id bson.ObjectId) error {
|
||||
return d.books.Remove(bson.M{"_id": id})
|
||||
}
|
||||
|
||||
func (d *DB) UpdateBook(id bson.ObjectId, data interface{}, unset ...interface{}) error {
|
||||
if len(unset) > 0 {
|
||||
return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data, "$unset": unset[0]})
|
||||
}
|
||||
return d.books.Update(bson.M{"_id": id}, bson.M{"$set": data})
|
||||
}
|
||||
|
||||
func (d *DB) IncVisit(id bson.ObjectId) error {
|
||||
return d.books.Update(bson.M{"_id": id}, bson.M{"$inc": bson.M{"VisitsCount": 1}})
|
||||
}
|
||||
|
||||
func (d *DB) IncDownload(id bson.ObjectId) error {
|
||||
return d.books.Update(bson.M{"_id": id}, bson.M{"$inc": bson.M{"DownloadCount": 1}})
|
||||
}
|
||||
|
||||
/* optional parameters: length and start index
|
||||
*
|
||||
* Returns: list of books, number found and err
|
||||
*/
|
||||
func (d *DB) GetBooks(query bson.M, r ...int) (books []Book, num int, err error) {
|
||||
var start, length int
|
||||
if len(r) > 0 {
|
||||
length = r[0]
|
||||
if len(r) > 1 {
|
||||
start = r[1]
|
||||
}
|
||||
}
|
||||
q := d.books.Find(query).Sort("-_id")
|
||||
num, err = q.Count()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if start != 0 {
|
||||
q = q.Skip(start)
|
||||
}
|
||||
if length != 0 {
|
||||
q = q.Limit(length)
|
||||
}
|
||||
|
||||
err = q.All(&books)
|
||||
for i, b := range books {
|
||||
books[i].Id = bson.ObjectId(b.Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* Get the most visited books
|
||||
*/
|
||||
func (d *DB) GetVisitedBooks(num int) (books []Book, err error) {
|
||||
var q *mgo.Query
|
||||
q = d.books.Find(bson.M{"active": true}).Sort("-VisitsCount").Limit(num)
|
||||
err = q.All(&books)
|
||||
for i, b := range books {
|
||||
books[i].Id = bson.ObjectId(b.Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* Get the most downloaded books
|
||||
*/
|
||||
func (d *DB) GetDownloadedBooks(num int) (books []Book, err error) {
|
||||
var q *mgo.Query
|
||||
q = d.books.Find(bson.M{"active": true}).Sort("-DownloadCount").Limit(num)
|
||||
err = q.All(&books)
|
||||
for i, b := range books {
|
||||
books[i].Id = bson.ObjectId(b.Id).Hex()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/* optional parameters: length and start index
|
||||
*
|
||||
* Returns: list of books, number found and err
|
||||
*/
|
||||
func (d *DB) GetNewBooks(r ...int) (books []Book, num int, err error) {
|
||||
return d.GetBooks(bson.M{"$nor": []bson.M{{"active": true}}}, r...)
|
||||
}
|
||||
|
||||
func (d *DB) BookActive(id bson.ObjectId) bool {
|
||||
var book Book
|
||||
err := d.books.Find(bson.M{"_id": id}).One(&book)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return book.Active
|
||||
}
|
||||
|
||||
func (d *DB) GetFS(prefix string) *mgo.GridFS {
|
||||
return d.session.DB(DB_NAME).GridFS(prefix)
|
||||
}
|
||||
|
||||
func (d *DB) areTagsOutdated() bool {
|
||||
var result struct {
|
||||
Id bson.ObjectId `bson:"_id"`
|
||||
}
|
||||
err := d.meta.Find(bson.M{"type": META_TYPE_TAGS}).One(&result)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
lastUpdate := result.Id.Time()
|
||||
return time.Since(lastUpdate).Minutes() > MINUTES_UPDATE_TAGS
|
||||
}
|
||||
|
||||
func (d *DB) updateTags() error {
|
||||
_, err := d.meta.RemoveAll(bson.M{"type": META_TYPE_TAGS})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var mr mgo.MapReduce
|
||||
mr.Map = "function() { " +
|
||||
"if (this.active) { this.subject.forEach(function(s) { emit(s, 1); }); }" +
|
||||
"}"
|
||||
mr.Reduce = "function(tag, vals) { " +
|
||||
"var count = 0;" +
|
||||
"vals.forEach(function() { count += 1; });" +
|
||||
"return count;" +
|
||||
"}"
|
||||
mr.Out = bson.M{"replace": TAGS_COLL}
|
||||
_, err = d.books.Find(bson.M{"active": true}).MapReduce(&mr, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return d.meta.Insert(bson.M{"type": META_TYPE_TAGS})
|
||||
}
|
||||
|
||||
func (d *DB) GetTags(numTags int) ([]string, error) {
|
||||
if d.areTagsOutdated() {
|
||||
err := d.updateTags()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var result []struct {
|
||||
Tag string "_id"
|
||||
}
|
||||
err := d.tags.Find(nil).Sort("-value").Limit(numTags).All(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := make([]string, len(result))
|
||||
for i, r := range result {
|
||||
tags[i] = r.Tag
|
||||
}
|
||||
return tags, nil
|
||||
}
|
61
tools/togridfs/togridfs.go
Normal file
61
tools/togridfs/togridfs.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"git.gitorious.org/go-pkg/epubgo.git"
|
||||
"io"
|
||||
"labix.org/v2/mgo/bson"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
db = initDB()
|
||||
defer db.Close()
|
||||
books, _, _ := db.GetBooks(bson.M{})
|
||||
fs := db.GetFS(FS_BOOKS)
|
||||
|
||||
for _, book := range books {
|
||||
if book.Path == "" {
|
||||
fmt.Println("don't needed -- ", book.Title)
|
||||
continue
|
||||
}
|
||||
fmt.Println(book.Title)
|
||||
|
||||
path := "books/" + book.Path
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
fmt.Println("os.Open ================", err)
|
||||
continue
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
fw, err := fs.Create(book.Title + ".epub")
|
||||
if err != nil {
|
||||
fmt.Println("gridfs.Create ================", err)
|
||||
continue
|
||||
}
|
||||
defer fw.Close()
|
||||
|
||||
_, err = io.Copy(fw, file)
|
||||
if err != nil {
|
||||
fmt.Println("io.Copy ================", err)
|
||||
continue
|
||||
}
|
||||
id, _ := fw.Id().(bson.ObjectId)
|
||||
|
||||
e, err := epubgo.Open(path)
|
||||
if err != nil {
|
||||
fmt.Println("epubgo.Open ================", err)
|
||||
continue
|
||||
}
|
||||
defer e.Close()
|
||||
|
||||
cover, coverSmall := GetCover(e, book.Title)
|
||||
if cover != "" {
|
||||
db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"cover": cover, "coversmall": coverSmall, "file": id}, bson.M{"path": 1})
|
||||
} else {
|
||||
fmt.Println("No cover ================", book.Title)
|
||||
db.UpdateBook(bson.ObjectIdHex(book.Id), bson.M{"file": id}, bson.M{"path": 1})
|
||||
}
|
||||
}
|
||||
}
|
Reference in a new issue