diff --git a/.gitignore b/.gitignore index 65bb228..76fdaad 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,6 @@ tools/getISBNnDesc/getISBNnDesc tools/coverNew/coverNew tools/addsize/addsize tools/importer/importer +tools/keywords/keywords tags .*.swp diff --git a/tools/README b/tools/README index 84312cd..18842dd 100644 --- a/tools/README +++ b/tools/README @@ -15,3 +15,5 @@ Password: - coverNew. Reload the cover from all the new books - addsize. Add the size of the books to the book metadata + +- keywords. Recalculate keywords diff --git a/tools/keywords/config.go b/tools/keywords/config.go new file mode 100644 index 0000000..902dcc4 --- /dev/null +++ b/tools/keywords/config.go @@ -0,0 +1,40 @@ +package main + +const ( + PORT = "8080" + + DB_IP = "127.0.0.1" + DB_NAME = "trantor" + META_COLL = "meta" + FS_BOOKS = "fs_books" + FS_IMGS = "fs_imgs" + + PASS_SALT = "ImperialLibSalt" + MINUTES_UPDATE_TAGS = 11 + MINUTES_UPDATE_VISITED = 41 + MINUTES_UPDATE_DOWNLOADED = 47 + MINUTES_UPDATE_HOURLY_V = 31 + MINUTES_UPDATE_DAILY_V = 60*12 + 7 + MINUTES_UPDATE_MONTHLY_V = 60*24 + 11 + MINUTES_UPDATE_HOURLY_D = 29 + MINUTES_UPDATE_DAILY_D = 60*12 + 13 + MINUTES_UPDATE_MONTHLY_D = 60*24 + 17 + MINUTES_UPDATE_LOGGER = 5 + BOOKS_FRONT_PAGE = 6 + SEARCH_ITEMS_PAGE = 20 + NEW_ITEMS_PAGE = 50 + NUM_NEWS = 10 + DAYS_NEWS_INDEXPAGE = 15 + + TEMPLATE_PATH = "templates/" + CSS_PATH = "css/" + JS_PATH = "js/" + IMG_PATH = "img/" + LOGGER_CONFIG = "logger.xml" + + IMG_WIDTH_BIG = 300 + IMG_WIDTH_SMALL = 60 + IMG_QUALITY = 80 + + CHAN_SIZE = 100 +) diff --git a/tools/keywords/keywords.go b/tools/keywords/keywords.go new file mode 100644 index 0000000..1bc5fd5 --- /dev/null +++ b/tools/keywords/keywords.go @@ -0,0 +1,63 @@ +package main + +import ( + "fmt" + "git.gitorious.org/trantor/trantor.git/database" + "gopkgs.com/unidecode.v1" + "labix.org/v2/mgo/bson" + "strings" + "unicode" +) + +func main() { + db := database.Init(DB_IP, DB_NAME) + defer db.Close() + + books, _, err := db.GetBooks(bson.M{}, 0, 0) + if err != nil { + fmt.Println(err) + return + } + + for _, b := range books { + fmt.Println(b.Title) + book := map[string]interface{}{ + "title": b.Title, + "author": b.Author, + "publisher": b.Publisher, + "subject": b.Subject, + } + k := keywords(book) + book = map[string]interface{}{"keywords": k} + id := bson.ObjectIdHex(b.Id) + err := db.UpdateBook(id, book) + if err != nil { + fmt.Println(err) + } + } +} + +func keywords(b map[string]interface{}) (k []string) { + title, _ := b["title"].(string) + k = tokens(title) + author, _ := b["author"].([]string) + for _, a := range author { + k = append(k, tokens(a)...) + } + publisher, _ := b["publisher"].(string) + k = append(k, tokens(publisher)...) + subject, _ := b["subject"].([]string) + for _, s := range subject { + k = append(k, tokens(s)...) + } + return +} + +func tokens(str string) []string { + str = unidecode.Unidecode(str) + str = strings.ToLower(str) + f := func(r rune) bool { + return unicode.IsControl(r) || unicode.IsPunct(r) || unicode.IsSpace(r) + } + return strings.FieldsFunc(str, f) +}