diff --git a/lib/Library.php b/lib/Library.php index 5d1b38ea..47fab570 100644 --- a/lib/Library.php +++ b/lib/Library.php @@ -190,6 +190,32 @@ class Library{ return $matches; } + /** + * @return array + */ + public static function GetEbooksFromFilesystem(?string $webRoot = WEB_ROOT): array{ + $ebooks = []; + $contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/ebooks/') . ' -name "content.opf" | sort') ?? '')); + + foreach($contentFiles as $path){ + if($path == '') + continue; + + $ebookWwwFilesystemPath = ''; + + try{ + $ebookWwwFilesystemPath = preg_replace('|/content\.opf|ius', '', $path); + + $ebooks[] = new Ebook($ebookWwwFilesystemPath); + } + catch(\Exception $ex){ + // An error in a book isn't fatal; just carry on. + } + } + + return $ebooks; + } + public static function RebuildCache(): void{ // We check a lockfile because this can be a long-running command. // We don't want to queue up a bunch of these in case someone is refreshing the index constantly. diff --git a/scripts/README.md b/scripts/README.md index fea199cd..329f8587 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -6,7 +6,7 @@ To use, call this script with the directory where your ebooks go as its last arg To use, call this script with the directories of the books you want to deploy as its arguments. For example, to deploy all ebooks after using sync-ebooks, run `deploy-ebook-to-www /standardebooks.org/ebooks/*`. To deploy only The Time Machine by H. G. Wells, you would run `deploy-ebook-to-www /standardebooks.org/ebooks/h-g-wells_the-time-machine`. To output progress information, use `-v` or `--verbose`. -The default web root is `/standardebooks.org`. If it is located elsewhere, specify it with the `--webroot` option. For instance, `deploy-ebook-to-www --webroot /var/www/html /path/to/ebook`. Note that there will be php errors if the Git repositories are not in the ebook directory immediately in the web root. Either keep them there or create a symlink. +The default web root is `/standardebooks.org/web/www`. If it is located elsewhere, specify it with the `--webroot` option. For instance, `deploy-ebook-to-www --webroot /var/www/html /path/to/ebook`. Note that there will be php errors if the Git repositories are not in the ebook directory immediately in the web root. Either keep them there or create a symlink. The default group is `se`. to use a different one, specify it with the `--group` option. diff --git a/scripts/deploy-ebook-to-www b/scripts/deploy-ebook-to-www index 2f0096f4..289edd05 100755 --- a/scripts/deploy-ebook-to-www +++ b/scripts/deploy-ebook-to-www @@ -9,7 +9,7 @@ USAGE deploy-ebook-to-www [-v,--verbose] [-g,--group GROUP] [--webroot WEBROOT] [--weburl WEBURL] [--no-images] [--no-build] [--no-epubcheck] [--no-recompose] [--no-feeds] [-l,--last-push-hash HASH] DIRECTORY [DIRECTORY...] DIRECTORY is a bare source repository. GROUP is a groupname. Defaults to "se". - WEBROOT is the path to your webroot. Defaults to "/standardebooks.org". + WEBROOT is the path to your webroot. Defaults to "/standardebooks.org/web/www". WEBURL is the URL the website is served on. Defaults to "https://standardebooks.org". The deploy process does four things: @@ -45,7 +45,7 @@ verbose="false" images="true" build="true" group="se" -webRoot="/standardebooks.org/web" +webRoot="/standardebooks.org/web/www" webUrl="https://standardebooks.org" lastPushHash="" epubcheck="true" @@ -134,7 +134,7 @@ if ! [ -f "${scriptsDir}"/generate-feeds ]; then die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found." fi -mkdir -p "${webRoot}"/www/images/covers/ +mkdir -p "${webRoot}"/images/covers/ for dir in "$@" do @@ -176,7 +176,7 @@ do if [ "${images}" = "true" ]; then # Always build images if they don't exist, or if they've changed - if [[ ! -f "${webRoot}/www/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then + if [[ ! -f "${webRoot}/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then images="true" else images="false" @@ -202,7 +202,7 @@ do workDir=$(mktemp -d) imgWorkDir=$(mktemp -d) - webDir="${webRoot}/www/ebooks/${webDir}" + webDir="${webRoot}/ebooks/${webDir}" if [ "${images}" = "true" ]; then if [ "${verbose}" = "true" ]; then @@ -296,7 +296,7 @@ do # Recompose the epub into a single file, but put it outside of the epub src for now so we don't stomp on it with the following sections. # We do this first because the tweaks below shouldn't apply to the single-page file - se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/www/css/web.css" "${workDir}" + se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/css/web.css" "${workDir}" # Add a navbar with a link back to the homepage sed --in-place --regexp-extended "s||
|" "${workDir}"/single-page.xhtml @@ -357,7 +357,7 @@ do if [ "${images}" = "true" ]; then # Move the cover images over - mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/www/images/covers/" + mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/images/covers/" fi # Delete the now-empty work dir (empty except for .git) @@ -366,8 +366,8 @@ do sudo chgrp --preserve-root --recursive "${group}" "${webDir}" sudo chmod --preserve-root --recursive g+ws "${webDir}" - sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/www/images/covers/" - sudo chmod --preserve-root --recursive g+ws "${webRoot}/www/images/covers/" + sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/images/covers/" + sudo chmod --preserve-root --recursive g+ws "${webRoot}/images/covers/" if [ "${verbose}" = "true" ]; then printf "Rebuilding web library cache ... " @@ -390,9 +390,9 @@ if [ "${feeds}" = "true" ]; then "${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}" - sudo chown --recursive se:committers "${webRoot}"/www/{atom,rss,opds}/{*.xml,subjects} - sudo chmod --recursive 664 "${webRoot}"/www/{atom,rss,opds}/{*.xml,subjects/*.xml} - sudo chmod 775 "${webRoot}"/www/{atom,rss,opds}/subjects + sudo chown --recursive se:committers "${webRoot}"/{atom,rss,opds}/{*.xml,subjects} + sudo chmod --recursive 664 "${webRoot}"/{atom,rss,opds}/{*.xml,subjects/*.xml} + sudo chmod 775 "${webRoot}"/{atom,rss,opds}/subjects if [ "${verbose}" = "true" ]; then printf "Done.\n" diff --git a/scripts/generate-feeds b/scripts/generate-feeds index 11eae990..7d2c4e88 100755 --- a/scripts/generate-feeds +++ b/scripts/generate-feeds @@ -20,59 +20,42 @@ function SaveFeed($feed, $force, $now = null){ } } -$longopts = ['webroot:', 'weburl:', 'force']; +$longopts = ['webroot:', 'force']; $options = getopt('', $longopts); -$webRoot = $options['webroot'] ?? '/standardebooks.org/web'; -$webUrl = $options['weburl'] ?? 'https://standardebooks.org'; +$webRoot = $options['webroot'] ?? WEB_ROOT; $force = isset($options['force']) ? true : false; // If the arg is present, getopts sets it to false!!! -$contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? '')); $allEbooks = []; $newestEbooks = []; $subjects = []; $ebooksBySubject = []; $ebooksPerNewestEbooksFeed = 30; -if(!is_dir(WEB_ROOT . '/feeds/opds/subjects')){ - mkdir(WEB_ROOT . '/feeds/opds/subjects'); +if(!is_dir($webRoot . '/feeds/opds/subjects')){ + mkdir($webRoot . '/feeds/opds/subjects'); } -if(!is_dir(WEB_ROOT . '/feeds/rss/subjects')){ - mkdir(WEB_ROOT . '/feeds/rss/subjects'); +if(!is_dir($webRoot . '/feeds/rss/subjects')){ + mkdir($webRoot . '/feeds/rss/subjects'); } -if(!is_dir(WEB_ROOT . '/feeds/atom/subjects')){ - mkdir(WEB_ROOT . '/feeds/atom/subjects'); +if(!is_dir($webRoot . '/feeds/atom/subjects')){ + mkdir($webRoot . '/feeds/atom/subjects'); } // Iterate over all ebooks to build the various feeds -foreach($contentFiles as $path){ - if($path == '') - continue; +foreach(Library::GetEbooksFromFilesystem($webRoot) as $ebook){ + $allEbooks[$ebook->Updated->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; + $newestEbooks[$ebook->Created->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; - $ebookWwwFilesystemPath = ''; - - try{ - $ebookWwwFilesystemPath = preg_replace('|/content\.opf|ius', '', $path); - - $ebook = new Ebook($ebookWwwFilesystemPath); - - $allEbooks[$ebook->Updated->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; - $newestEbooks[$ebook->Created->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; - - foreach($ebook->Tags as $tag){ - // Add the book's subjects to the main subjects list - if(!in_array($tag->Name, $subjects)){ - $subjects[] = $tag->Name; - } - - // Sort this ebook by subject - $ebooksBySubject[$tag->Name][$ebook->Created->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; + foreach($ebook->Tags as $tag){ + // Add the book's subjects to the main subjects list + if(!in_array($tag->Name, $subjects)){ + $subjects[] = $tag->Name; } - } - catch(\Exception $ex){ - print('Failed to generate OPDS entry for `' . $ebookWwwFilesystemPath . '`. Exception: ' . $ex->getMessage()); - continue; + + // Sort this ebook by subject + $ebooksBySubject[$tag->Name][$ebook->Created->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; } } @@ -107,7 +90,7 @@ $opdsRootEntries = [ 'acquisition') ]; -$opdsRoot = new OpdsNavigationFeed('Standard Ebooks', 'The Standard Ebooks catalog.', '/feeds/opds', WEB_ROOT . '/feeds/opds/index.xml', $opdsRootEntries, null); +$opdsRoot = new OpdsNavigationFeed('Standard Ebooks', 'The Standard Ebooks catalog.', '/feeds/opds', $webRoot . '/feeds/opds/index.xml', $opdsRootEntries, null); SaveFeed($opdsRoot, $force, $now); // Create the subjects navigation document @@ -116,56 +99,56 @@ $subjectNavigationEntries = []; foreach($subjects as $subject){ $subjectNavigationEntries[] = new OpdsNavigationEntry($subject, 'Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.', '/feeds/opds/subjects/' . Formatter::MakeUrlSafe($subject), $now, 'subsection', 'navigation'); } -$subjectsFeed = new OpdsNavigationFeed('Standard Ebooks by Subject', 'Browse Standard Ebooks by subject.', '/feeds/opds/subjects', WEB_ROOT . '/feeds/opds/subjects/index.xml', $subjectNavigationEntries, $opdsRoot); +$subjectsFeed = new OpdsNavigationFeed('Standard Ebooks by Subject', 'Browse Standard Ebooks by subject.', '/feeds/opds/subjects', $webRoot . '/feeds/opds/subjects/index.xml', $subjectNavigationEntries, $opdsRoot); $subjectsFeed->Subtitle = 'Browse Standard Ebooks by subject.'; SaveFeed($subjectsFeed, $force, $now); // Now generate each individual subject feed foreach($subjectNavigationEntries as $subjectNavigationEntry){ krsort($ebooksBySubject[$subjectNavigationEntry->Title]); - $subjectFeed = new OpdsAcquisitionFeed($subjectNavigationEntry->Title . ' Ebooks', $subjectNavigationEntry->Description, '/feeds/opds/subjects/' . Formatter::MakeUrlSafe($subjectNavigationEntry->Title), WEB_ROOT . '/feeds/opds/subjects/' . Formatter::MakeUrlSafe($subjectNavigationEntry->Title) . '.xml', $ebooksBySubject[$subjectNavigationEntry->Title], $subjectsFeed); + $subjectFeed = new OpdsAcquisitionFeed($subjectNavigationEntry->Title . ' Ebooks', $subjectNavigationEntry->Description, '/feeds/opds/subjects/' . Formatter::MakeUrlSafe($subjectNavigationEntry->Title), $webRoot . '/feeds/opds/subjects/' . Formatter::MakeUrlSafe($subjectNavigationEntry->Title) . '.xml', $ebooksBySubject[$subjectNavigationEntry->Title], $subjectsFeed); SaveFeed($subjectFeed, $force, $now); } // Create the 'all' feed krsort($allEbooks); -$allFeed = new OpdsAcquisitionFeed('All Standard Ebooks', 'All Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.', '/feeds/opds/all', WEB_ROOT . '/feeds/opds/all.xml', $allEbooks, $opdsRoot, true); +$allFeed = new OpdsAcquisitionFeed('All Standard Ebooks', 'All Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.', '/feeds/opds/all', $webRoot . '/feeds/opds/all.xml', $allEbooks, $opdsRoot, true); SaveFeed($allFeed, $force, $now); // Create the 'newest' feed -$newestFeed = new OpdsAcquisitionFeed('Newest Standard Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/opds/new-releases', WEB_ROOT . '/feeds/opds/new-releases.xml', $newestEbooks, $opdsRoot); +$newestFeed = new OpdsAcquisitionFeed('Newest Standard Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/opds/new-releases', $webRoot . '/feeds/opds/new-releases.xml', $newestEbooks, $opdsRoot); SaveFeed($newestFeed, $force, $now); // Now create RSS feeds // Create the 'newest' feed -$newestRssFeed = new RssFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/rss/new-releases', WEB_ROOT . '/feeds/rss/new-releases.xml', $newestEbooks); +$newestRssFeed = new RssFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/rss/new-releases', $webRoot . '/feeds/rss/new-releases.xml', $newestEbooks); SaveFeed($newestRssFeed, $force); // Create the 'all' feed -$allRssFeed = new RssFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/rss/all', WEB_ROOT . '/feeds/rss/all.xml', $allEbooks); +$allRssFeed = new RssFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/rss/all', $webRoot . '/feeds/rss/all.xml', $allEbooks); SaveFeed($allRssFeed, $force); // Generate each individual subject feed foreach($ebooksBySubject as $subject => $ebooks){ krsort($ebooks); - $subjectRssFeed = new RssFeed('Standard Ebooks - ' . (string)$subject . ' Ebooks', 'Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.', '/feeds/rss/subjects/' . Formatter::MakeUrlSafe((string)$subject), WEB_ROOT . '/feeds/rss/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks); + $subjectRssFeed = new RssFeed('Standard Ebooks - ' . (string)$subject . ' Ebooks', 'Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.', '/feeds/rss/subjects/' . Formatter::MakeUrlSafe((string)$subject), $webRoot . '/feeds/rss/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks); SaveFeed($subjectRssFeed, $force); } // Now create the Atom feeds // Create the 'newest' feed -$newestAtomFeed = new AtomFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/atom/new-releases', WEB_ROOT . '/feeds/atom/new-releases.xml', $newestEbooks); +$newestAtomFeed = new AtomFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/atom/new-releases', $webRoot . '/feeds/atom/new-releases.xml', $newestEbooks); SaveFeed($newestAtomFeed, $force, $now); // Create the 'all' feed -$allAtomFeed = new AtomFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/atom/all', WEB_ROOT . '/feeds/atom/all.xml', $allEbooks); +$allAtomFeed = new AtomFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/atom/all', $webRoot . '/feeds/atom/all.xml', $allEbooks); SaveFeed($allAtomFeed, $force, $now); // Generate each individual subject feed foreach($ebooksBySubject as $subject => $ebooks){ krsort($ebooks); - $subjectAtomFeed = new AtomFeed('Standard Ebooks - ' . (string)$subject . ' Ebooks', 'Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.', '/feeds/atom/subjects/' . Formatter::MakeUrlSafe((string)$subject), WEB_ROOT . '/feeds/atom/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks); + $subjectAtomFeed = new AtomFeed('Standard Ebooks - ' . (string)$subject . ' Ebooks', 'Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.', '/feeds/atom/subjects/' . Formatter::MakeUrlSafe((string)$subject), $webRoot . '/feeds/atom/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks); SaveFeed($subjectAtomFeed, $force, $now); } ?> diff --git a/scripts/generate-monthly-downloads b/scripts/generate-monthly-downloads index 12c93e28..478e2bc4 100755 --- a/scripts/generate-monthly-downloads +++ b/scripts/generate-monthly-downloads @@ -2,49 +2,32 @@ Created->format('Y-m'); + $updatedTimestamp = $ebook->Updated->getTimestamp(); - $ebookWwwFilesystemPath = ''; - - try{ - $ebookWwwFilesystemPath = preg_replace('|/content\.opf|ius', '', $path); - - $ebook = new Ebook($ebookWwwFilesystemPath); - - $timestamp = $ebook->Created->format('Y-m'); - $updatedTimestamp = $ebook->Updated->getTimestamp(); - - if(!isset($ebooksByMonth[$timestamp])){ - $ebooksByMonth[$timestamp] = []; - $lastUpdatedTimestamps[$timestamp] = $updatedTimestamp; - } - - $ebooksByMonth[$timestamp][] = $ebook; - if($updatedTimestamp > $lastUpdatedTimestamps[$timestamp]){ - $lastUpdatedTimestamps[$timestamp] = $updatedTimestamp; - } + if(!isset($ebooksByMonth[$timestamp])){ + $ebooksByMonth[$timestamp] = []; + $lastUpdatedTimestamps[$timestamp] = $updatedTimestamp; } - catch(\Exception $ex){ - print('Failed to generate download for `' . $ebookWwwFilesystemPath . '`. Exception: ' . $ex->getMessage()); - continue; + + $ebooksByMonth[$timestamp][] = $ebook; + if($updatedTimestamp > $lastUpdatedTimestamps[$timestamp]){ + $lastUpdatedTimestamps[$timestamp] = $updatedTimestamp; } } foreach($ebooksByMonth as $month => $ebooks){ $filename = 'se-ebooks-' . $month . '.zip'; - $filePath = $webRoot . '/www/patrons-circle/downloads/' . $filename; + $filePath = $webRoot . '/patrons-circle/downloads/' . $filename; // If the file doesn't exist, or if the content.opf last updated time is newer than the file modification time if(!file_exists($filePath) || filemtime($filePath) < $lastUpdatedTimestamps[$month]){ @@ -61,28 +44,33 @@ foreach($ebooksByMonth as $month => $ebooks){ foreach($ebooks as $ebook){ if($ebook->EpubUrl !== null){ - $ebookFilePath = $webRoot . '/www' . $ebook->EpubUrl; + $ebookFilePath = $webRoot . '/' . $ebook->EpubUrl; $zip->addFile($ebookFilePath, $ebook->UrlSafeIdentifier . '/' . basename($ebookFilePath)); } if($ebook->Azw3Url !== null){ - $ebookFilePath = $webRoot . '/www' . $ebook->Azw3Url; + $ebookFilePath = $webRoot . '/' . $ebook->Azw3Url; $zip->addFile($ebookFilePath, $ebook->UrlSafeIdentifier . '/' . basename($ebookFilePath)); } if($ebook->KepubUrl !== null){ - $ebookFilePath = $webRoot . '/www' . $ebook->KepubUrl; + $ebookFilePath = $webRoot . '/' . $ebook->KepubUrl; $zip->addFile($ebookFilePath, $ebook->UrlSafeIdentifier . '/' . basename($ebookFilePath)); } if($ebook->AdvancedEpubUrl !== null){ - $ebookFilePath = $webRoot . '/www' . $ebook->AdvancedEpubUrl; + $ebookFilePath = $webRoot . '/' . $ebook->AdvancedEpubUrl; $zip->addFile($ebookFilePath, $ebook->UrlSafeIdentifier . '/' . basename($ebookFilePath)); } if($ebook->TextSinglePageUrl !== null){ - $ebookFilePath = $webRoot . '/www' . $ebook->TextSinglePageUrl . '.xhtml'; - $zip->addFile($ebookFilePath, $ebook->UrlSafeIdentifier . '/' . str_replace('single-page', $ebook->UrlSafeIdentifier, basename($ebookFilePath))); + $ebookFilePath = $webRoot . '/' . $ebook->TextSinglePageUrl . '.xhtml'; + + // Strip the navigation header that was added as part of the deploy process + $xhtml = file_get_contents($ebookFilePath); + $xhtml = preg_replace('|
|ius', '', $xhtml); + + $zip->addFromString($ebook->UrlSafeIdentifier . '/' . str_replace('single-page', $ebook->UrlSafeIdentifier, basename($ebookFilePath)), $xhtml); } }