diff --git a/README b/README index 9e170e6..a65c336 100644 --- a/README +++ b/README @@ -34,7 +34,7 @@ Yo also need to install go dependences: Now you can install Trantor itself: -# go get -tags prod gitlab.com/trantor/trantor +# go get gitlab.com/trantor/trantor You can run trantor in /srv/www/trantor i.e. For this: @@ -57,8 +57,7 @@ Go to your browser to: http://localhost:8080 Edit config.go if you want to change the port and other configuration, by default is 8080 Now you can compile Trantor: -$ go build -tags prod -(remove '-tags prod' for a faster compilation without language guessing) +$ go build Now you can run it: $ ./trantor diff --git a/language_develop.go b/language_develop.go deleted file mode 100644 index 1858bba..0000000 --- a/language_develop.go +++ /dev/null @@ -1,16 +0,0 @@ -// +build !prod - -// This is a dummy implementation of GuessLang used to make the compilation faster on development -// -// To build trantor with the proper language guessing do: -// $ go build -tags prod - -package main - -import ( - "github.com/meskio/epubgo" -) - -func GuessLang(epub *epubgo.Epub, orig_langs []string) []string { - return orig_langs -} diff --git a/language.go b/parser/language.go similarity index 97% rename from language.go rename to parser/language.go index 87f48e9..2a56a78 100644 --- a/language.go +++ b/parser/language.go @@ -1,6 +1,4 @@ -// +build prod - -package main +package parser import ( "io/ioutil" diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..d26f6b9 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,100 @@ +package parser + +import ( + "regexp" + "strings" + + "git.gitorious.org/go-pkg/epubgo.git" +) + +type MetaData map[string]interface{} + +func EpubMetadata(epub *epubgo.Epub) MetaData { + metadata := MetaData{} + for _, m := range epub.MetadataFields() { + data, err := epub.Metadata(m) + if err != nil { + continue + } + switch m { + case "creator": + metadata["author"] = parseAuthr(data) + case "description": + metadata[m] = parseDescription(data) + case "subject": + metadata[m] = parseSubject(data) + case "date": + metadata[m] = parseDate(data) + case "language": + metadata["lang"] = GuessLang(epub, data) + case "title", "contributor", "publisher": + metadata[m] = cleanStr(strings.Join(data, ", ")) + case "identifier": + attr, _ := epub.MetadataAttr(m) + for i, d := range data { + if attr[i]["scheme"] == "ISBN" { + metadata["isbn"] = d + } + } + default: + metadata[m] = strings.Join(data, ", ") + } + } + return metadata +} + +func cleanStr(str string) string { + str = strings.Replace(str, "'", "'", -1) + exp, _ := regexp.Compile("&[^;]*;") + str = exp.ReplaceAllString(str, "") + exp, _ = regexp.Compile("[ ,]*$") + str = exp.ReplaceAllString(str, "") + return str +} + +func parseAuthr(creator []string) []string { + exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$") + exp2, _ := regexp.Compile("^[^:]*: *(.*)$") + res := make([]string, len(creator)) + for i, s := range creator { + auth := exp1.FindStringSubmatch(s) + if auth != nil { + res[i] = cleanStr(strings.Join(auth[2:], ", ")) + } else { + auth := exp2.FindStringSubmatch(s) + if auth != nil { + res[i] = cleanStr(auth[1]) + } else { + res[i] = cleanStr(s) + } + } + } + return res +} + +func parseDescription(description []string) string { + str := cleanStr(strings.Join(description, "\n")) + str = strings.Replace(str, "

", "\n", -1) + exp, _ := regexp.Compile("<[^>]*>") + str = exp.ReplaceAllString(str, "") + str = strings.Replace(str, "&", "&", -1) + str = strings.Replace(str, "<", "<", -1) + str = strings.Replace(str, ">", ">", -1) + str = strings.Replace(str, "\\n", "\n", -1) + return str +} + +func parseSubject(subject []string) []string { + var res []string + for _, s := range subject { + res = append(res, strings.Split(s, " / ")...) + } + return res +} + +func parseDate(date []string) string { + if len(date) == 0 { + return "" + } + return strings.Replace(date[0], "Unspecified: ", "", -1) +} diff --git a/upload.go b/upload.go index fe1305a..32e1239 100644 --- a/upload.go +++ b/upload.go @@ -8,11 +8,10 @@ import ( "encoding/base64" "io/ioutil" "mime/multipart" - "regexp" - "strings" "github.com/meskio/epubgo" "gitlab.com/trantor/trantor/database" + "gitlab.com/trantor/trantor/parser" "gitlab.com/trantor/trantor/storage" ) @@ -47,7 +46,11 @@ func processFile(req uploadRequest, db *database.DB, store *storage.Store) { } defer epub.Close() - book, id := parseFile(epub, store) + id := genId() + metadata := parser.EpubMetadata(epub) + metadata["id"] = id + metadata["cover"] = GetCover(epub, id, store) + req.file.Seek(0, 0) size, err := store.Store(id, req.file, EPUB_FILE) if err != nil { @@ -55,8 +58,8 @@ func processFile(req uploadRequest, db *database.DB, store *storage.Store) { return } - book["filesize"] = size - err = db.AddBook(book) + metadata["filesize"] = size + err = db.AddBook(metadata) if err != nil { log.Error("Error storing metadata (", id, "): ", err) return @@ -107,102 +110,8 @@ func openMultipartEpub(file multipart.File) (*epubgo.Epub, error) { return epubgo.Load(reader, int64(len(buff))) } -func parseFile(epub *epubgo.Epub, store *storage.Store) (metadata map[string]interface{}, id string) { - book := map[string]interface{}{} - for _, m := range epub.MetadataFields() { - data, err := epub.Metadata(m) - if err != nil { - continue - } - switch m { - case "creator": - book["author"] = parseAuthr(data) - case "description": - book[m] = parseDescription(data) - case "subject": - book[m] = parseSubject(data) - case "date": - book[m] = parseDate(data) - case "language": - book["lang"] = GuessLang(epub, data) - case "title", "contributor", "publisher": - book[m] = cleanStr(strings.Join(data, ", ")) - case "identifier": - attr, _ := epub.MetadataAttr(m) - for i, d := range data { - if attr[i]["scheme"] == "ISBN" { - book["isbn"] = d - } - } - default: - book[m] = strings.Join(data, ", ") - } - } - - id = genId() - book["id"] = id - book["cover"] = GetCover(epub, id, store) - return book, id -} - func genId() string { b := make([]byte, 12) rand.Read(b) return base64.URLEncoding.EncodeToString(b) } - -func cleanStr(str string) string { - str = strings.Replace(str, "'", "'", -1) - exp, _ := regexp.Compile("&[^;]*;") - str = exp.ReplaceAllString(str, "") - exp, _ = regexp.Compile("[ ,]*$") - str = exp.ReplaceAllString(str, "") - return str -} - -func parseAuthr(creator []string) []string { - exp1, _ := regexp.Compile("^(.*\\( *([^\\)]*) *\\))*$") - exp2, _ := regexp.Compile("^[^:]*: *(.*)$") - res := make([]string, len(creator)) - for i, s := range creator { - auth := exp1.FindStringSubmatch(s) - if auth != nil { - res[i] = cleanStr(strings.Join(auth[2:], ", ")) - } else { - auth := exp2.FindStringSubmatch(s) - if auth != nil { - res[i] = cleanStr(auth[1]) - } else { - res[i] = cleanStr(s) - } - } - } - return res -} - -func parseDescription(description []string) string { - str := cleanStr(strings.Join(description, "\n")) - str = strings.Replace(str, "

", "\n", -1) - exp, _ := regexp.Compile("<[^>]*>") - str = exp.ReplaceAllString(str, "") - str = strings.Replace(str, "&", "&", -1) - str = strings.Replace(str, "<", "<", -1) - str = strings.Replace(str, ">", ">", -1) - str = strings.Replace(str, "\\n", "\n", -1) - return str -} - -func parseSubject(subject []string) []string { - var res []string - for _, s := range subject { - res = append(res, strings.Split(s, " / ")...) - } - return res -} - -func parseDate(date []string) string { - if len(date) == 0 { - return "" - } - return strings.Replace(date[0], "Unspecified: ", "", -1) -}