From 45221365b56a7de35721c2d24aae6926b6e4d057 Mon Sep 17 00:00:00 2001 From: Alex Cabal Date: Sun, 10 Jul 2022 13:18:22 -0500 Subject: [PATCH] More work on bulk downloads --- config/apache/standardebooks.org.conf | 4 +- config/apache/standardebooks.test.conf | 5 +- lib/Library.php | 17 ++++-- lib/Tag.php | 2 +- scripts/generate-bulk-downloads | 76 +++++++++++++++++------ templates/BulkDownloadTable.php | 2 +- www/bulk-downloads/authors/index.php | 2 +- www/bulk-downloads/collections/index.php | 2 +- www/bulk-downloads/get.php | 78 +++++++++++++++++------- www/bulk-downloads/months/index.php | 2 +- www/bulk-downloads/subjects/index.php | 2 +- www/css/core.css | 10 ++- www/ebooks/author.php | 6 +- www/ebooks/index.php | 16 ++--- 14 files changed, 156 insertions(+), 68 deletions(-) diff --git a/config/apache/standardebooks.org.conf b/config/apache/standardebooks.org.conf index 95e00c8c..d4f063e3 100644 --- a/config/apache/standardebooks.org.conf +++ b/config/apache/standardebooks.org.conf @@ -245,8 +245,10 @@ Define webroot /standardebooks.org/web RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.(jpg|avif)$ /images/covers/$1-$2$3.$4 RewriteRule ^/ebooks/([^\./]+?)$ /ebooks/author.php?url-path=$1 [QSA] - RewriteRule ^/tags/([^\./]+?)$ /ebooks/index.php?tags[]=$1 [QSA] + RewriteRule ^/ebooks/([^\./]+?)/downloads$ /bulk-downloads/get.php?author=$1 [QSA] + RewriteRule ^/subjects/([^\./]+?)$ /ebooks/index.php?tags[]=$1 [QSA] RewriteRule ^/collections/([^\./]+?)$ /ebooks/index.php?collection=$1 [QSA] + RewriteRule ^/collections/([^/]+?)/downloads$ /bulk-downloads/get.php?collection=$1 # Prevent this rule from firing if we're getting a distribution file RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/downloads/.+$ diff --git a/config/apache/standardebooks.test.conf b/config/apache/standardebooks.test.conf index 5ae91589..8bf6b5d8 100644 --- a/config/apache/standardebooks.test.conf +++ b/config/apache/standardebooks.test.conf @@ -227,9 +227,10 @@ Define webroot /standardebooks.org/web RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.(jpg|avif)$ /images/covers/$1-$2$3.$4 RewriteRule ^/ebooks/([^\./]+?)$ /ebooks/author.php?url-path=$1 [QSA] - RewriteRule ^/tags/([^\./]+?)$ /ebooks/index.php?tags[]=$1 [QSA] + RewriteRule ^/ebooks/([^\./]+?)/downloads$ /bulk-downloads/get.php?author=$1 [QSA] + RewriteRule ^/subjects/([^\./]+?)$ /ebooks/index.php?tags[]=$1 [QSA] RewriteRule ^/collections/([^\./]+?)$ /ebooks/index.php?collection=$1 [QSA] - RewriteRule ^/collections/([^/]+?)/download$ /bulk-downloads/get.php?collection=$1 + RewriteRule ^/collections/([^/]+?)/downloads$ /bulk-downloads/get.php?collection=$1 # Prevent this rule from firing if we're getting a distribution file RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/downloads/.+$ diff --git a/lib/Library.php b/lib/Library.php index b6fc46af..817ef5b9 100644 --- a/lib/Library.php +++ b/lib/Library.php @@ -225,7 +225,7 @@ class Library{ return $ebooks; } - private static function FillBulkDownloadObject(string $dir, string $downloadType): stdClass{ + private static function FillBulkDownloadObject(string $dir, string $downloadType, string $urlRoot): stdClass{ $obj = new stdClass(); // The count of ebooks in each file is stored as a filesystem attribute @@ -243,7 +243,12 @@ class Library{ $obj->Label = basename($dir); } - $obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label); + $obj->UrlLabel = exec('attr -g se-url-label ' . escapeshellarg($dir)) ?: null; + if($obj->UrlLabel === null){ + $obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label); + } + + $obj->Url = $urlRoot . '/' . $obj->UrlLabel; $obj->LabelSort = exec('attr -g se-label-sort ' . escapeshellarg($dir)) ?: null; if($obj->LabelSort === null){ @@ -329,7 +334,7 @@ class Library{ rsort($dirs); foreach($dirs as $dir){ - $obj = self::FillBulkDownloadObject($dir, 'months'); + $obj = self::FillBulkDownloadObject($dir, 'months', '/months'); $date = new DateTime($obj->Label . '-01'); $year = $date->format('Y'); @@ -346,7 +351,7 @@ class Library{ // Generate bulk downloads by subject foreach(glob(WEB_ROOT . '/bulk-downloads/subjects/*/', GLOB_NOSORT) as $dir){ - $subjects[] = self::FillBulkDownloadObject($dir, 'subjects'); + $subjects[] = self::FillBulkDownloadObject($dir, 'subjects', '/subjects'); } usort($subjects, function($a, $b){ return $a->LabelSort <=> $b->LabelSort; }); @@ -354,7 +359,7 @@ class Library{ // Generate bulk downloads by collection foreach(glob(WEB_ROOT . '/bulk-downloads/collections/*/', GLOB_NOSORT) as $dir){ - $collections[] = self::FillBulkDownloadObject($dir, 'collections'); + $collections[] = self::FillBulkDownloadObject($dir, 'collections', '/collections'); } usort($collections, function($a, $b){ return $a->LabelSort <=> $b->LabelSort; }); @@ -362,7 +367,7 @@ class Library{ // Generate bulk downloads by authors foreach(glob(WEB_ROOT . '/bulk-downloads/authors/*/', GLOB_NOSORT) as $dir){ - $authors[] = self::FillBulkDownloadObject($dir, 'authors'); + $authors[] = self::FillBulkDownloadObject($dir, 'authors', '/ebooks'); } usort($authors, function($a, $b){ return $a->LabelSort <=> $b->LabelSort; }); diff --git a/lib/Tag.php b/lib/Tag.php index 024b3fb4..bf66d475 100644 --- a/lib/Tag.php +++ b/lib/Tag.php @@ -7,6 +7,6 @@ class Tag{ public function __construct(string $name){ $this->Name = $name; $this->UrlName = Formatter::MakeUrlSafe($this->Name); - $this->Url = '/tags/' . $this->UrlName; + $this->Url = '/subjects/' . $this->UrlName; } } diff --git a/scripts/generate-bulk-downloads b/scripts/generate-bulk-downloads index ef01c46d..567a91fd 100755 --- a/scripts/generate-bulk-downloads +++ b/scripts/generate-bulk-downloads @@ -13,6 +13,25 @@ $groups = ['collections', 'subjects', 'authors', 'months']; $ebooksByGroup = []; $updatedByGroup = []; +function rrmdir($src){ + // See https://www.php.net/manual/en/function.rmdir.php#117354 + $dir = opendir($src); + while(false !== ($file = readdir($dir))) { + if (($file != '.') && ($file != '..')){ + $full = $src . '/' . $file; + if(is_dir($full)){ + rrmdir($full); + } + else{ + unlink($full); + } + } + } + + closedir($dir); + rmdir($src); +} + function CreateZip(string $filePath, array $ebooks, string $type, string $webRoot): void{ $tempFilename = tempnam(sys_get_temp_dir(), "se-ebooks"); @@ -71,6 +90,7 @@ foreach(Library::GetEbooksFromFilesystem($webRoot) as $ebook){ $obj = new stdClass(); $obj->Label = $timestamp; $obj->LabelSort = $timestamp; + $obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label); $obj->Updated = $updatedTimestamp; $obj->Ebooks = [$ebook]; @@ -89,6 +109,7 @@ foreach(Library::GetEbooksFromFilesystem($webRoot) as $ebook){ $obj = new stdClass(); $obj->Label = $tag->Name; $obj->LabelSort = $tag->Name; + $obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label); $obj->Updated = $updatedTimestamp; $obj->Ebooks = [$ebook]; @@ -108,6 +129,7 @@ foreach(Library::GetEbooksFromFilesystem($webRoot) as $ebook){ $obj = new stdClass(); $obj->Label = $collection->Name; $obj->LabelSort = $collection->GetSortedName(); + $obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label); $obj->Updated = $updatedTimestamp; $obj->Ebooks = [$ebook]; @@ -122,29 +144,46 @@ foreach(Library::GetEbooksFromFilesystem($webRoot) as $ebook){ } // Add to the 'books by author' list - foreach($ebook->Authors as $author){ - if(!isset($ebooksByGroup['authors'][$author->Name])){ - $obj = new stdClass(); - $obj->Label = $author->Name; - $obj->LabelSort = $author->SortName; - $obj->Updated = $updatedTimestamp; - $obj->Ebooks = [$ebook]; + // We have to index by UrlName for cases like `Samuel Butler` whose UrlName is `samuel-butler-1612-1680`. + $authorsUrl = preg_replace('|^/ebooks/|', '', $ebook->AuthorsUrl); + if(!isset($ebooksByGroup['authors'][$authorsUrl])){ + $obj = new stdClass(); + $obj->Label = strip_tags($ebook->AuthorsHtml); + $obj->LabelSort = $ebook->Authors[0]->SortName; + $obj->UrlLabel = $authorsUrl; + $obj->Updated = $updatedTimestamp; + $obj->Ebooks = [$ebook]; - $ebooksByGroup['authors'][$author->Name] = $obj; - } - else{ - $ebooksByGroup['authors'][$author->Name]->Ebooks[] = $ebook; - if($updatedTimestamp > $ebooksByGroup['authors'][$author->Name]->Updated){ - $ebooksByGroup['authors'][$author->Name]->Updated = $updatedTimestamp; - } + $ebooksByGroup['authors'][$authorsUrl] = $obj; + } + else{ + $ebooksByGroup['authors'][$authorsUrl]->Ebooks[] = $ebook; + if($updatedTimestamp > $ebooksByGroup['authors'][$authorsUrl]->Updated){ + $ebooksByGroup['authors'][$authorsUrl]->Updated = $updatedTimestamp; } } } foreach($groups as $group){ + // First delete any orphan directories that we don't expect to be here, for example a collection that was later renamed + foreach(glob($webRoot . '/bulk-downloads/' . $group . '/*/') as $dir){ + $expected = false; + foreach($ebooksByGroup[$group] as $collection){ + if($collection->UrlLabel == basename($dir)){ + $expected = true; + break; + } + } + + if(!$expected){ + print('Removing ' . $dir . "\n"); + rrmdir($dir); + } + } + + // Now create the zip files! foreach($ebooksByGroup[$group] as $collection){ - $urlSafeCollection = Formatter::MakeUrlSafe($collection->Label); - $parentDir = $webRoot . '/bulk-downloads/' . $group . '/' . $urlSafeCollection; + $parentDir = $webRoot . '/bulk-downloads/' . $group . '/' . $collection->UrlLabel; if(!is_dir($parentDir)){ mkdir($parentDir, 0775, true); @@ -154,8 +193,11 @@ foreach($groups as $group){ exec('attr -q -s se-label -V ' . escapeshellarg($collection->Label) . ' ' . escapeshellarg($parentDir)); exec('attr -q -s se-label-sort -V ' . escapeshellarg($collection->LabelSort) . ' ' . escapeshellarg($parentDir)); + // We also need to save the URL label for author edge cases like `Samuel Butler` -> `samuel-butler-1612-1680` or `Karl Marx and Freidrich Engels` -> `karl-marx_friedrich-engels` + exec('attr -q -s se-url-label -V ' . escapeshellarg($collection->UrlLabel) . ' ' . escapeshellarg($parentDir)); + foreach($types as $type){ - $filePath = $parentDir . '/se-ebooks-' . $urlSafeCollection . '-' . $type . '.zip'; + $filePath = $parentDir . '/se-ebooks-' . $collection->UrlLabel . '-' . $type . '.zip'; // If the file doesn't exist, or if the content.opf last updated time is newer than the file modification time if(!file_exists($filePath) || filemtime($filePath) < $collection->Updated){ diff --git a/templates/BulkDownloadTable.php b/templates/BulkDownloadTable.php index ddaaa3da..93d8eb83 100644 --- a/templates/BulkDownloadTable.php +++ b/templates/BulkDownloadTable.php @@ -10,7 +10,7 @@ - Label) ?> + Label) ?> EbookCount)) ?> UpdatedString) ?> diff --git a/www/bulk-downloads/authors/index.php b/www/bulk-downloads/authors/index.php index cec1ad2a..c5d3c182 100644 --- a/www/bulk-downloads/authors/index.php +++ b/www/bulk-downloads/authors/index.php @@ -29,7 +29,7 @@ catch(Safe\Exceptions\ApcuException $ex){ $forbiddenException]) ?>

Patrons circle members can download zip files containing all of the ebooks that were released in a given month of Standard Ebooks history. You can join the Patrons Circle with a small donation in support of our continuing mission to create free, beautiful digital literature.

-

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook.

+

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook. Read about which file format to download.

If you’re a Patrons Circle member, when prompted enter your email address and leave the password field blank to download these files.

'Author', 'collections' => $authors]); ?> diff --git a/www/bulk-downloads/collections/index.php b/www/bulk-downloads/collections/index.php index f282cdef..be1e93fa 100644 --- a/www/bulk-downloads/collections/index.php +++ b/www/bulk-downloads/collections/index.php @@ -29,7 +29,7 @@ catch(Safe\Exceptions\ApcuException $ex){ $forbiddenException]) ?>

Patrons circle members can download zip files containing all of the ebooks that were released in a given month of Standard Ebooks history. You can join the Patrons Circle with a small donation in support of our continuing mission to create free, beautiful digital literature.

-

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook.

+

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook. Read about which file format to download.

If you’re a Patrons Circle member, when prompted enter your email address and leave the password field blank to download these files.

'Collection', 'collections' => $collections]); ?> diff --git a/www/bulk-downloads/get.php b/www/bulk-downloads/get.php index 04b267ed..824b529e 100644 --- a/www/bulk-downloads/get.php +++ b/www/bulk-downloads/get.php @@ -3,7 +3,10 @@ require_once('Core.php'); use function Safe\apcu_fetch; -$bulkDownloadCollection = null; +$collection = null; +$collectionUrlName = HttpInput::Str(GET, 'collection', false); +$collection = null; +$authorUrlName = HttpInput::Str(GET, 'author', false); $exception = null; $user = null; @@ -11,29 +14,60 @@ try{ if(isset($_SERVER['PHP_AUTH_USER'])){ $user = User::GetByPatronIdentifier($_SERVER['PHP_AUTH_USER']); } + + if($collectionUrlName !== null){ + $collections = []; + + // Get all collections and then find the specific one we're looking for + try{ + $collections = apcu_fetch('bulk-downloads-collections'); + } + catch(Safe\Exceptions\ApcuException $ex){ + $result = Library::RebuildBulkDownloadsCache(); + $collections = $result['collections']; + } + + foreach($collections as $c){ + if($c->UrlLabel == $collectionUrlName){ + $collection = $c; + break; + } + } + + if($collection === null){ + throw new Exceptions\InvalidCollectionException(); + } + } + + if($authorUrlName !== null){ + $authors = []; + + // Get all authors and then find the specific one we're looking for + try{ + $collections = apcu_fetch('bulk-downloads-authors'); + } + catch(Safe\Exceptions\ApcuException $ex){ + $result = Library::RebuildBulkDownloadsCache(); + $collections = $result['authors']; + } + + foreach($collections as $c){ + if($c->UrlLabel == $authorUrlName){ + $collection = $c; + break; + } + } + + if($collection === null){ + throw new Exceptions\InvalidAuthorException(); + } + } } catch(Exceptions\InvalidUserException $ex){ $exception = new Exceptions\InvalidPatronException(); } - -try{ - - $collection = HttpInput::Str(GET, 'collection', false) ?? ''; - $collections = []; - - try{ - $collections = apcu_fetch('bulk-downloads-collections'); - } - catch(Safe\Exceptions\ApcuException $ex){ - $result = Library::RebuildBulkDownloadsCache(); - $collections = $result['collections']; - } - - if(!isset($collections[$collection]) || sizeof($collections[$collection]) == 0){ - throw new Exceptions\InvalidCollectionException(); - } - - $bulkDownloadCollection = $collections[$collection]; +catch(Exceptions\InvalidCollectionException $ex){ + Template::Emit404(); } catch(Exceptions\InvalidCollectionException $ex){ Template::Emit404(); @@ -42,7 +76,7 @@ catch(Exceptions\InvalidCollectionException $ex){ ?> 'Download ', 'highlight' => '', 'description' => 'Download zip files containing all of the Standard Ebooks released in a given month.']) ?>
-

Download the Label ?> Collection

+

Download the Label ?> Collection

$exception]) ?>

Patrons circle members can download zip files containing all of the ebooks in a collection. You can join the Patrons Circle with a small donation in support of our continuing mission to create free, beautiful digital literature.

@@ -51,7 +85,7 @@ catch(Exceptions\InvalidCollectionException $ex){

Select the ebook format in which you’d like to download this collection.

You can also read about which ebook format to download.

- 'Collection', 'collections' => [$bulkDownloadCollection]]); ?> + 'Collection', 'collections' => [$collection]]); ?>
diff --git a/www/bulk-downloads/months/index.php b/www/bulk-downloads/months/index.php index f310acf8..e5226d8d 100644 --- a/www/bulk-downloads/months/index.php +++ b/www/bulk-downloads/months/index.php @@ -29,7 +29,7 @@ catch(Safe\Exceptions\ApcuException $ex){ $forbiddenException]) ?>

Patrons circle members can download zip files containing all of the ebooks that were released in a given month of Standard Ebooks history. You can join the Patrons Circle with a small donation in support of our continuing mission to create free, beautiful digital literature.

-

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook.

+

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook. Read about which file format to download.

If you’re a Patrons Circle member, when prompted enter your email address and leave the password field blank to download these files.

diff --git a/www/bulk-downloads/subjects/index.php b/www/bulk-downloads/subjects/index.php index 5c927650..3da3e9dd 100644 --- a/www/bulk-downloads/subjects/index.php +++ b/www/bulk-downloads/subjects/index.php @@ -29,7 +29,7 @@ catch(Safe\Exceptions\ApcuException $ex){ $forbiddenException]) ?>

Patrons circle members can download zip files containing all of the ebooks that were released in a given month of Standard Ebooks history. You can join the Patrons Circle with a small donation in support of our continuing mission to create free, beautiful digital literature.

-

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook.

+

These zip files contain each ebook in every format we offer, and are updated once daily with the latest versions of each ebook. Read about which file format to download.

If you’re a Patrons Circle member, when prompted enter your email address and leave the password field blank to download these files.

'Subject', 'collections' => $subjects]); ?> diff --git a/www/css/core.css b/www/css/core.css index f4d59bc0..0f17b40b 100644 --- a/www/css/core.css +++ b/www/css/core.css @@ -2301,10 +2301,17 @@ article.step-by-step-guide ol ol{ width: 100%; } +h1.is-collection{ + margin-bottom: 1rem; +} + .download-collection{ display: flex; justify-content: center; - margin-bottom: 2rem; + margin-bottom: 4rem; + border-bottom: 1px dashed var(--sub-text); + padding-bottom: 1rem; + font-style: italic; } abbr.acronym{ @@ -3359,6 +3366,7 @@ ul.feed p{ body > header ul li:nth-child(2) ~ li, body > header ul li + li{ margin-top: 1rem; + padding-top: 0; } body > header ul li, diff --git a/www/ebooks/author.php b/www/ebooks/author.php index 4ca91870..411fb038 100644 --- a/www/ebooks/author.php +++ b/www/ebooks/author.php @@ -23,8 +23,12 @@ catch(Exceptions\InvalidAuthorException $ex){ } ?> 'Ebooks by ' . strip_tags($ebooks[0]->AuthorsHtml), 'highlight' => 'ebooks', 'description' => 'All of the Standard Ebooks ebooks by ' . strip_tags($ebooks[0]->AuthorsHtml)]) ?>
-

Ebooks by AuthorsHtml ?>

+ 1){ ?> class="is-collection">Ebooks by AuthorsHtml ?> + 1){ ?> +

Download all ebooks in this collection

+ $ebooks, 'view' => VIEW_GRID]) ?> +

We also have bulk ebook downloads available, as well as ebook catalog feeds for use directly in your ereader app or RSS reader.

diff --git a/www/ebooks/index.php b/www/ebooks/index.php index 8510a567..22825407 100644 --- a/www/ebooks/index.php +++ b/www/ebooks/index.php @@ -65,17 +65,9 @@ try{ $collectionName = preg_replace('/^The /ius', '', $collectionObject->Name); $collectionType = $collectionObject->Type ?? 'collection'; - # This is a kind of .endswith() test - if(substr_compare(mb_strtolower($collectionObject->Name), mb_strtolower($collectionObject->Type), -strlen(mb_strtolower($collectionObject->Type))) !== 0){ - $collectionType = ' ' . $collectionType; - } - else{ - $collectionType = ''; - } - - $pageTitle = 'Browse free ebooks in the ' . Formatter::ToPlainText($collectionName) . $collectionType; + $pageTitle = 'Browse free ebooks in the ' . Formatter::ToPlainText($collectionName) . ' ' . $collectionType; $pageDescription = 'A list of free ebooks in the ' . Formatter::ToPlainText($collectionName) . ' ' . $collectionType; - $pageHeader = 'Free ebooks in the ' . Formatter::ToPlainText($collectionName) . ' ' . $collectionType; + $pageHeader = 'Free Ebooks in the ' . Formatter::ToPlainText($collectionName) . ' ' . ucfirst($collectionType); } else{ throw new Exceptions\InvalidCollectionException(); @@ -126,7 +118,7 @@ catch(Exceptions\InvalidCollectionException $ex){ } ?> $pageTitle, 'highlight' => 'ebooks', 'description' => $pageDescription]) ?>
-

+ 1){ ?> class="is-collection"> @@ -136,7 +128,7 @@ catch(Exceptions\InvalidCollectionException $ex){ $query, 'tags' => $tags, 'sort' => $sort, 'view' => $view, 'perPage' => $perPage]) ?> 1){ ?> -

Download entire collection

+

Download all ebooks in this collection

No ebooks matched your filters. You can try different filters, or browse all of our ebooks.