Make MARC roles an enum and restructure how contributors are fetched from the DB to reduce queries

This commit is contained in:
Alex Cabal 2024-11-11 12:01:53 -06:00
parent daf8e16ef4
commit d6a2bdcbc8
8 changed files with 91 additions and 75 deletions

View file

@ -1,3 +1,12 @@
# Work around some naughty hotlinkers
RewriteCond %{HTTP_REFERER} !^$
RewriteCond %{HTTP_REFERER} ^https?://.*medialibrary.it [NC]
RewriteRule ^(/ebooks/.+/downloads/|/images/covers/) /images/do-not-hotlink.jpg [R=301,L,NC]
# Redirect cover images with caching sha's to the root image
# We do this because some sites like Google cache the cover image path, so changing it results in lots of 404s
RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.(jpg|avif)$ /images/covers/$1-$2$3.$4
# Rewrite ebook downloads
RewriteRule ^/ebooks/(.+?)/download$ /ebooks/download.php?url-path=$1 [QSA]

View file

@ -7,15 +7,6 @@ RewriteRule ^/tools$ https://github.com/standardebooks/tools [R=302,L]
# Redirect latest version of the manual
RewriteRule ^/manual/latest(.*) /manual/index.php?url=$1 [L]
# Work around some naughty hotlinkers
RewriteCond %{HTTP_REFERER} !^$
RewriteCond %{HTTP_REFERER} ^https?://.*medialibrary.it [NC]
RewriteRule ^(/ebooks/.+/downloads/|/images/covers/) /images/do-not-hotlink.jpg [R=301,L,NC]
# Redirect cover images with caching sha's to the root image
# We do this because some sites like Google cache the cover image path, so changing it results in lots of 404s
RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.(jpg|avif)$ /images/covers/$1-$2$3.$4
# Rewrite rules for bulk downloads
RewriteRule ^/bulk-downloads/(.+\.zip)$ /bulk-downloads/download.php?path=$1
RewriteRule ^/bulk-downloads/([^/\.]+)$ /bulk-downloads/collection.php?class=$1

View file

@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS `Contributors` (
`UrlName` varchar(255) NOT NULL,
`SortName` varchar(255) NULL,
`WikipediaUrl` varchar(255) NULL,
`MarcRole` varchar(10) NULL,
`MarcRole` ENUM('aut', 'ctb', 'ill', 'trl') NOT NULL,
`FullName` varchar(255) NULL,
`NacoafUrl` varchar(255) NULL,
`SortOrder` tinyint(3) unsigned NOT NULL,

View file

@ -7,7 +7,7 @@ class Contributor{
public string $UrlName;
public ?string $SortName = null;
public ?string $WikipediaUrl = null;
public ?string $MarcRole = null;
public Enums\MarcRole $MarcRole;
public ?string $FullName = null;
public ?string $NacoafUrl = null;
public int $SortOrder;
@ -32,19 +32,14 @@ class Contributor{
if($this->Name == ''){
$error->Add(new Exceptions\ContributorNameRequiredException());
}
$this->UrlName = Formatter::MakeUrlSafe($this->Name);
}
else{
$error->Add(new Exceptions\ContributorNameRequiredException());
}
if(isset($this->UrlName)){
$this->UrlName = trim($this->UrlName);
if($this->UrlName == ''){
$error->Add(new Exceptions\ContributorUrlNameRequiredException());
}
}
else{
if(!isset($this->UrlName)){
$error->Add(new Exceptions\ContributorUrlNameRequiredException());
}
@ -73,11 +68,6 @@ class Contributor{
}
}
$this->MarcRole = trim($this->MarcRole ?? '');
if($this->MarcRole == ''){
$this->MarcRole = null;
}
$this->NacoafUrl = trim($this->NacoafUrl ?? '');
if($this->NacoafUrl == ''){
$this->NacoafUrl = null;

View file

@ -199,18 +199,55 @@ class Ebook{
return $this->_Sources;
}
/**
* Fill all contributor properties for this ebook, e.g. authors, translators, etc.
*
* We do this in a single database query to prevent 4+ queries for each ebook.
*/
protected function GetAllContributors(): void{
$contributors = Db::Query('
SELECT *
from Contributors
where EbookId = ?
order by MarcRole asc, SortOrder asc
', [$this->EbookId], Contributor::class);
$this->_Authors = [];
$this->_Translators = [];
$this->_Illustrators = [];
$this->_Contributors = [];
foreach($contributors as $contributor){
switch($contributor->MarcRole){
case Enums\MarcRole::Author:
$this->_Authors[] = $contributor;
break;
case Enums\MarcRole::Translator:
$this->_Translators[] = $contributor;
break;
case Enums\MarcRole::Illustrator:
$this->_Illustrators[] = $contributor;
break;
case Enums\MarcRole::Contributor:
$this->_Contributors[] = $contributor;
break;
}
}
}
/**
* @return array<Contributor>
*/
protected function GetAuthors(): array{
if(!isset($this->_Authors)){
$this->_Authors = Db::Query('
SELECT *
from Contributors
where EbookId = ?
and MarcRole = ?
order by SortOrder asc
', [$this->EbookId, 'aut'], Contributor::class);
$this->GetAllContributors();
}
return $this->_Authors;
@ -221,13 +258,7 @@ class Ebook{
*/
protected function GetIllustrators(): array{
if(!isset($this->_Illustrators)){
$this->_Illustrators = Db::Query('
SELECT *
from Contributors
where EbookId = ?
and MarcRole = ?
order by SortOrder asc
', [$this->EbookId, 'ill'], Contributor::class);
$this->GetAllContributors();
}
return $this->_Illustrators;
@ -238,13 +269,7 @@ class Ebook{
*/
protected function GetTranslators(): array{
if(!isset($this->_Translators)){
$this->_Translators = Db::Query('
SELECT *
from Contributors
where EbookId = ?
and MarcRole = ?
order by SortOrder asc
', [$this->EbookId, 'trl'], Contributor::class);
$this->GetAllContributors();
}
return $this->_Translators;
@ -255,13 +280,7 @@ class Ebook{
*/
protected function GetContributors(): array{
if(!isset($this->_Contributors)){
$this->_Contributors = Db::Query('
SELECT *
from Contributors
where EbookId = ?
and MarcRole = ?
order by SortOrder asc
', [$this->EbookId, 'ctb'], Contributor::class);
$this->GetAllContributors();
}
return $this->_Contributors;
@ -317,13 +336,9 @@ class Ebook{
return $this->_UrlSafeIdentifier;
}
private function GetLatestCommitHash(): string{
return substr(sha1($this->GitCommits[0]->Hash), 0, 8);
}
protected function GetHeroImageUrl(): string{
if(!isset($this->_HeroImageUrl)){
$this->_HeroImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-hero.jpg';
$this->_HeroImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-hero.jpg';
}
return $this->_HeroImageUrl;
@ -332,7 +347,7 @@ class Ebook{
protected function GetHeroImageAvifUrl(): ?string{
if(!isset($this->_HeroImageAvifUrl)){
if(file_exists(WEB_ROOT . '/images/covers/' . $this->UrlSafeIdentifier . '-hero.avif')){
$this->_HeroImageAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-hero.avif';
$this->_HeroImageAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-hero.avif';
}
}
@ -341,7 +356,7 @@ class Ebook{
protected function GetHeroImage2xUrl(): string{
if(!isset($this->_HeroImage2xUrl)){
$this->_HeroImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-hero@2x.jpg';
$this->_HeroImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-hero@2x.jpg';
}
return $this->_HeroImage2xUrl;
@ -350,7 +365,7 @@ class Ebook{
protected function GetHeroImage2xAvifUrl(): ?string{
if(!isset($this->_HeroImage2xAvifUrl)){
if(file_exists(WEB_ROOT . '/images/covers/' . $this->UrlSafeIdentifier . '-hero@2x.avif')){
$this->_HeroImage2xAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-hero@2x.avif';
$this->_HeroImage2xAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-hero@2x.avif';
}
}
@ -359,7 +374,7 @@ class Ebook{
protected function GetCoverImageUrl(): string{
if(!isset($this->_CoverImageUrl)){
$this->_CoverImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-cover.jpg';
$this->_CoverImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-cover.jpg';
}
return $this->_CoverImageUrl;
@ -368,7 +383,7 @@ class Ebook{
protected function GetCoverImageAvifUrl(): ?string{
if(!isset($this->_CoverImageAvifUrl)){
if(file_exists(WEB_ROOT . '/images/covers/' . $this->UrlSafeIdentifier . '-cover.avif')){
$this->_CoverImageAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-cover.avif';
$this->_CoverImageAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-cover.avif';
}
}
@ -377,7 +392,7 @@ class Ebook{
protected function GetCoverImage2xUrl(): string{
if(!isset($this->_CoverImage2xUrl)){
$this->_CoverImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-cover@2x.jpg';
$this->_CoverImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-cover@2x.jpg';
}
return $this->_CoverImage2xUrl;
@ -386,7 +401,7 @@ class Ebook{
protected function GetCoverImage2xAvifUrl(): ?string{
if(!isset($this->_CoverImage2xAvifUrl)){
if(file_exists(WEB_ROOT . '/images/covers/' . $this->UrlSafeIdentifier . '-cover@2x.avif')){
$this->_CoverImage2xAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $this->GetLatestCommitHash() . '-cover@2x.avif';
$this->_CoverImage2xAvifUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . substr(sha1($this->Updated->format(Enums\DateTimeFormat::UnixTimestamp->value)), 0, 8) . '-cover@2x.avif';
}
}
@ -823,7 +838,7 @@ class Ebook{
$contributor->SortName = $fileAs;
$contributor->FullName = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]'));
$contributor->WikipediaUrl = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]'));
$contributor->MarcRole = 'aut';
$contributor->MarcRole = Enums\MarcRole::Author;
$contributor->NacoafUrl = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.authority.nacoaf"][@refines="#' . $id . '"]'));
$authors[] = $contributor;
@ -850,7 +865,7 @@ class Ebook{
$c->SortName = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="file-as"][@refines="#' . $id . '"]'));
$c->FullName = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]'));
$c->WikipediaUrl = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]'));
$c->MarcRole = $role;
$c->MarcRole = Enums\MarcRole::tryFrom((string)$role) ?? Enums\MarcRole::Contributor;
$c->NacoafUrl = Ebook::NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.authority.nacoaf"][@refines="#' . $id . '"]'));
// A display-sequence of 0 indicates that we don't want to process this contributor.
@ -1437,10 +1452,10 @@ class Ebook{
foreach($contributors as $contributor){
$role = 'schema:contributor';
switch($contributor->MarcRole){
case 'trl':
case Enums\MarcRole::Translator:
$role = 'schema:translator';
break;
case 'ill':
case Enums\MarcRole::Illustrator:
$role = 'schema:illustrator';
break;
}
@ -1497,10 +1512,10 @@ class Ebook{
foreach($this->Translators as $contributor){
$role = 'schema:contributor';
switch($contributor->MarcRole){
case 'trl':
case Enums\MarcRole::Translator:
$role = 'schema:translator';
break;
case 'ill':
case Enums\MarcRole::Illustrator:
$role = 'schema:illustrator';
break;
}

9
lib/Enums/MarcRole.php Normal file
View file

@ -0,0 +1,9 @@
<?
namespace Enums;
enum MarcRole: string{
case Author = 'aut';
case Contributor = 'ctb';
case Illustrator = 'ill';
case Translator = 'trl';
}

View file

@ -354,7 +354,7 @@ do
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li><li><a href=\"${bookUrl}/text\">Table of contents</a></li></ul></nav></header>|"
# Add a chapter navigation footer to each page.
"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"
#"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"
# Adjust sponsored links in the colophon.
sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/src/epub/text/colophon.xhtml

View file

@ -23,8 +23,10 @@ $types = ['epub', 'epub-advanced', 'azw3', 'kepub', 'xhtml'];
$groups = ['collections', 'subjects', 'authors', 'months'];
$ebooksByGroup = ['collections' => [], 'subjects' => [], 'authors' => [], 'months' => []];
/**
* @see https://www.php.net/manual/en/function.rmdir.php#117354
*/
function rrmdir(string $src): void{
// See <https://www.php.net/manual/en/function.rmdir.php#117354>.
$dir = opendir($src);
while(false !== ($file = readdir($dir))){
if (($file != '.') && ($file != '..')){
@ -94,8 +96,8 @@ function CreateZip(string $filePath, array $ebooks, string $type, string $webRoo
$zip->close();
// We have to do a copy, then unlink because rename() can't rename across file systems.
// If the bulk downloads are symlinked to a storage volume, the rename() won't work.
// We have to do a copy, then unlink because `rename()` can't rename across file systems.
// If the bulk downloads are symlinked to a storage volume, then `rename()` won't work.
copy($tempFilename, $filePath);
unlink($tempFilename);
@ -167,7 +169,7 @@ foreach(Ebook::GetAll() as $ebook){
}
// Add to the 'books by author' list.
// We have to index by UrlName for cases like `Samuel Butler` whose UrlName is `samuel-butler-1612-1680`.
// We have to index by `UrlName` for cases like `Samuel Butler` whose `UrlName` is `samuel-butler-1612-1680`.
$authorsUrl = preg_replace('|^/ebooks/|', '', $ebook->AuthorsUrl);
if(!isset($ebooksByGroup['authors'][$authorsUrl])){
$obj = new stdClass();