2012-07-30 23:23:38 +02:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"git.gitorious.org/go-pkg/epub.git"
|
|
|
|
"labix.org/v2/mgo"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2012-08-15 15:12:59 +02:00
|
|
|
IP = "127.0.0.1"
|
|
|
|
DB_NAME = "trantor"
|
|
|
|
BOOKS_COLL = "books"
|
|
|
|
PATH = "books/"
|
|
|
|
NEW_PATH = "new/"
|
|
|
|
COVER_PATH = "cover/"
|
|
|
|
RESIZE = "/usr/bin/convert -resize 300 -quality 60 "
|
2012-07-30 23:23:38 +02:00
|
|
|
RESIZE_THUMB = "/usr/bin/convert -resize 60 -quality 60 "
|
|
|
|
)
|
|
|
|
|
2012-08-15 17:12:15 +02:00
|
|
|
func resize(folder, name, extension string) (string, string) {
|
|
|
|
imgPath := folder + name + extension
|
|
|
|
resize := append(strings.Split(RESIZE, " "), imgPath, imgPath)
|
|
|
|
cmd := exec.Command(resize[0], resize[1:]...)
|
|
|
|
cmd.Run()
|
|
|
|
imgPathSmall := folder + name + "_small" + extension
|
|
|
|
resize = append(strings.Split(RESIZE_THUMB, " "), imgPath, imgPathSmall)
|
|
|
|
cmd = exec.Command(resize[0], resize[1:]...)
|
|
|
|
cmd.Run()
|
|
|
|
return "/" + imgPath, "/" + imgPathSmall
|
|
|
|
}
|
|
|
|
|
2012-07-30 23:23:38 +02:00
|
|
|
func getCover(e *epub.Epub, path string) (string, string) {
|
2012-08-15 17:12:15 +02:00
|
|
|
folder := COVER_PATH + path[:1] + "/"
|
|
|
|
os.Mkdir(folder, os.ModePerm)
|
|
|
|
|
|
|
|
/* Try first common names */
|
|
|
|
imgPath := folder + path + ".jpg"
|
|
|
|
file, _ := os.Create(imgPath)
|
|
|
|
defer file.Close()
|
|
|
|
n, _ := file.Write(e.Data("cover.jpg"))
|
|
|
|
if n != 0 {
|
|
|
|
return resize(folder, path, ".jpg")
|
|
|
|
}
|
|
|
|
n, _ = file.Write(e.Data("cover.jpeg"))
|
|
|
|
if n != 0 {
|
|
|
|
return resize(folder, path, ".jpg")
|
|
|
|
}
|
|
|
|
defer os.Remove(imgPath)
|
|
|
|
|
2012-08-15 14:51:59 +02:00
|
|
|
exp, _ := regexp.Compile("<img.*src=[\"']([^\"']*(\\.[^\\.\"']*))[\"']")
|
2012-07-30 23:23:38 +02:00
|
|
|
it := e.Iterator(epub.EITERATOR_SPINE)
|
|
|
|
defer it.Close()
|
|
|
|
|
|
|
|
var err error = nil
|
|
|
|
txt := it.Curr()
|
|
|
|
for err == nil {
|
|
|
|
res := exp.FindStringSubmatch(txt)
|
|
|
|
if res != nil {
|
2012-08-15 17:12:15 +02:00
|
|
|
urlPart := strings.Split(it.CurrUrl(), "/")
|
|
|
|
url := strings.Join(urlPart[:len(urlPart)-1], "/")
|
|
|
|
if url == "" {
|
|
|
|
url = res[1]
|
|
|
|
} else {
|
|
|
|
url = url + "/" + res[1]
|
|
|
|
}
|
|
|
|
imgPath = folder + path + res[2]
|
2012-07-30 23:23:38 +02:00
|
|
|
f, _ := os.Create(imgPath)
|
2012-08-15 17:12:15 +02:00
|
|
|
defer f.Close()
|
|
|
|
/* try to write it, if there is nothing search for other img */
|
|
|
|
n, _ = f.Write(e.Data(url))
|
|
|
|
if n != 0 {
|
|
|
|
return resize(folder, path, res[2])
|
|
|
|
}
|
|
|
|
panic(url) // FIXME
|
|
|
|
defer os.Remove(imgPath)
|
2012-07-30 23:23:38 +02:00
|
|
|
}
|
|
|
|
txt, err = it.Next()
|
|
|
|
}
|
|
|
|
return "", ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseAuthr(creator []string) []string {
|
|
|
|
exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$")
|
|
|
|
exp2, _ := regexp.Compile("^[^:]*: *(.*)$")
|
|
|
|
var res []string //TODO: can be predicted the lenght
|
|
|
|
for _, s := range creator {
|
|
|
|
auth := exp1.FindStringSubmatch(s)
|
|
|
|
if auth != nil {
|
|
|
|
res = append(res, strings.Join(auth[2:], ", "))
|
|
|
|
} else {
|
|
|
|
auth := exp2.FindStringSubmatch(s)
|
|
|
|
if auth != nil {
|
|
|
|
res = append(res, auth[1])
|
|
|
|
} else {
|
|
|
|
res = append(res, s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseSubject(subject []string) []string {
|
|
|
|
var res []string
|
|
|
|
for _, s := range subject {
|
2012-08-15 17:12:15 +02:00
|
|
|
res = append(res, strings.Split(s, " / ")...)
|
2012-07-30 23:23:38 +02:00
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func keywords(b Book) (k []string) {
|
|
|
|
k = strings.Split(b.Title, " ")
|
|
|
|
for _, a := range b.Author {
|
|
|
|
k = append(k, strings.Split(a, " ")...)
|
|
|
|
}
|
|
|
|
k = append(k, strings.Split(b.Publisher, " ")...)
|
|
|
|
k = append(k, b.Subject...)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func store(coll *mgo.Collection, path string) {
|
|
|
|
var book Book
|
|
|
|
|
2012-08-15 17:12:15 +02:00
|
|
|
fmt.Println(path)
|
2012-08-02 00:42:09 +02:00
|
|
|
e, err := epub.Open(NEW_PATH+path, 0)
|
2012-07-30 23:23:38 +02:00
|
|
|
if err != nil {
|
|
|
|
panic(err) // TODO: do something
|
|
|
|
}
|
|
|
|
defer e.Close()
|
|
|
|
|
|
|
|
book.Title = strings.Join(e.Metadata(epub.EPUB_TITLE), ", ")
|
|
|
|
book.Author = parseAuthr(e.Metadata(epub.EPUB_CREATOR))
|
|
|
|
book.Contributor = strings.Join(e.Metadata(epub.EPUB_CONTRIB), ", ")
|
|
|
|
book.Publisher = strings.Join(e.Metadata(epub.EPUB_PUBLISHER), ", ")
|
|
|
|
book.Description = strings.Join(e.Metadata(epub.EPUB_DESCRIPTION), ", ")
|
|
|
|
book.Subject = parseSubject(e.Metadata(epub.EPUB_SUBJECT))
|
|
|
|
book.Date = strings.Join(e.Metadata(epub.EPUB_DATE), ", ")
|
|
|
|
book.Lang = e.Metadata(epub.EPUB_LANG)
|
|
|
|
book.Type = strings.Join(e.Metadata(epub.EPUB_TYPE), ", ")
|
|
|
|
book.Format = strings.Join(e.Metadata(epub.EPUB_FORMAT), ", ")
|
|
|
|
book.Source = strings.Join(e.Metadata(epub.EPUB_SOURCE), ", ")
|
|
|
|
book.Relation = strings.Join(e.Metadata(epub.EPUB_RELATION), ", ")
|
|
|
|
book.Coverage = strings.Join(e.Metadata(epub.EPUB_COVERAGE), ", ")
|
|
|
|
book.Rights = strings.Join(e.Metadata(epub.EPUB_RIGHTS), ", ")
|
|
|
|
book.Meta = strings.Join(e.Metadata(epub.EPUB_META), ", ")
|
2012-07-31 00:38:06 +02:00
|
|
|
book.Path = PATH + path[:1] + "/" + path
|
2012-07-30 23:23:38 +02:00
|
|
|
book.Cover, book.CoverSmall = getCover(e, path)
|
|
|
|
book.Keywords = keywords(book)
|
|
|
|
coll.Insert(book)
|
2012-07-31 00:38:06 +02:00
|
|
|
|
2012-08-15 15:12:59 +02:00
|
|
|
os.Mkdir(PATH+path[:1], os.ModePerm)
|
2012-08-02 00:42:09 +02:00
|
|
|
cmd := exec.Command("mv", NEW_PATH+path, book.Path)
|
2012-07-31 00:38:06 +02:00
|
|
|
cmd.Run()
|
2012-07-30 23:23:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
session, err := mgo.Dial(IP)
|
|
|
|
if err != nil {
|
|
|
|
panic(err) // TODO: do something
|
|
|
|
}
|
|
|
|
defer session.Close()
|
|
|
|
coll := session.DB(DB_NAME).C(BOOKS_COLL)
|
|
|
|
|
2012-08-02 00:42:09 +02:00
|
|
|
f, err := os.Open(NEW_PATH)
|
2012-07-30 23:23:38 +02:00
|
|
|
if err != nil {
|
2012-08-02 00:42:09 +02:00
|
|
|
fmt.Println(NEW_PATH)
|
2012-07-30 23:23:38 +02:00
|
|
|
panic(err) // TODO: do something
|
|
|
|
}
|
|
|
|
names, err := f.Readdirnames(0)
|
|
|
|
if err != nil {
|
|
|
|
panic(err) // TODO: do something
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, name := range names {
|
|
|
|
store(coll, name)
|
|
|
|
}
|
|
|
|
}
|