From 133f93cdce4ddc3cec8031c783609d9962b1a99c Mon Sep 17 00:00:00 2001 From: Alex Cabal Date: Thu, 25 Jun 2020 12:56:14 -0500 Subject: [PATCH] Add subjects OPDS feeds, and switch to a more object-oriented approach to generating the OPDS feeds --- .gitignore | 1 + lib/OpdsAcquisitionFeed.php | 24 ++++++++++ lib/OpdsFeed.php | 45 +++++++++---------- lib/OpdsNavigationEntry.php | 20 +++++++++ lib/OpdsNavigationFeed.php | 22 +++++++++ scripts/deploy-ebook-to-www | 4 +- scripts/generate-opds.php | 38 ++++++++++++++-- ...OpdsEntry.php => OpdsAcquisitionEntry.php} | 18 ++++---- .../{OpdsFeed.php => OpdsAcquisitionFeed.php} | 15 ++++--- templates/OpdsNavigationFeed.php | 36 +++++++++++++++ www/opds/search.php | 15 ++++--- 11 files changed, 187 insertions(+), 51 deletions(-) create mode 100644 lib/OpdsAcquisitionFeed.php create mode 100644 lib/OpdsNavigationEntry.php create mode 100644 lib/OpdsNavigationFeed.php rename templates/{OpdsEntry.php => OpdsAcquisitionEntry.php} (66%) rename templates/{OpdsFeed.php => OpdsAcquisitionFeed.php} (56%) create mode 100644 templates/OpdsNavigationFeed.php diff --git a/.gitignore b/.gitignore index fed117e7..760ee273 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ ebooks/* www/ebooks/* www/images/covers/* www/opds/*.xml +www/opds/subjects www/rss/*.xml vendor/ composer.lock diff --git a/lib/OpdsAcquisitionFeed.php b/lib/OpdsAcquisitionFeed.php new file mode 100644 index 00000000..43ea50c3 --- /dev/null +++ b/lib/OpdsAcquisitionFeed.php @@ -0,0 +1,24 @@ +Ebooks = $ebooks; + $this->IsCrawlable = $isCrawlable; + } + + public function Save(string $path): void{ + $updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z'); + + $feed = Template::OpdsAcquisitionFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]); + + $this->SaveIfChanged($path, $feed, $updatedTimestamp); + } +} diff --git a/lib/OpdsFeed.php b/lib/OpdsFeed.php index 6d1f47ec..fc547952 100644 --- a/lib/OpdsFeed.php +++ b/lib/OpdsFeed.php @@ -8,18 +8,16 @@ class OpdsFeed{ public $Id; public $Url; public $Title; - public $Ebooks = []; - public $IsCrawlable; + public $ParentUrl; - public function __construct(string $url, string $title, array $ebooks, bool $isCrawlable = false){ + public function __construct(string $url, string $title, ?string $parentUrl){ $this->Url = $url; - $this->Id = $url; + $this->Id = SITE_URL . $url; $this->Title = $title; - $this->Ebooks = $ebooks; - $this->IsCrawlable = $isCrawlable; + $this->ParentUrl = $parentUrl; } - private function Sha1Entries(string $xmlString): string{ + protected function Sha1Entries(string $xmlString): string{ try{ $xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $xmlString)); $xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/'); @@ -28,6 +26,13 @@ class OpdsFeed{ $output = ''; foreach($entries as $entry){ + // Remove any elements, we don't want to compare against those. + // This makes it easier to for example generate a new subjects index, + // while updating it at the same time. + foreach($xml->xpath('/feed/entry/updated') as $element){ + unset($element[0]); + } + $output .= $entry->asXml(); } @@ -39,31 +44,25 @@ class OpdsFeed{ } } - public function Save(string $filename): void{ - $updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z'); - - $feed = Template::OpdsFeed(['id' => $this->Url, 'url' => $this->Url, 'title' => $this->Title, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]); - + protected function SaveIfChanged(string $path, string $feed, string $updatedTimestamp): void{ $tempFilename = tempnam('/tmp/', 'se-opds-'); file_put_contents($tempFilename, $feed); exec('se clean ' . escapeshellarg($tempFilename)); // Did we actually update the feed? If so, write to file and update the index - if(!is_file($filename)){ - // File doesn't exist, write it out - rename($tempFilename, $filename); - } - elseif($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($filename))){ - // Files don't match, save the file and update the index feed with the last updated timestamp - $xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents(WEB_ROOT . '/opds/index.xml'))); - $xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/'); - $xml->registerXPathNamespace('schema', 'http://schema.org/'); + if(!is_file($path) || ($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($path)))){ + // Files don't match, save the file and update the parent navigation feed with the last updated timestamp + $parentFilepath = WEB_ROOT . str_replace(SITE_URL, '', $this->ParentUrl); + if(!is_file($parentFilepath)){ + $parentFilepath .= '/index.xml'; + } + $xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($parentFilepath))); $feedEntry = ($xml->xpath('/feed/entry[id="' . $this->Id . '"]/updated') ?? [])[0]; $feedEntry[0] = $updatedTimestamp; - file_put_contents(WEB_ROOT . '/opds/index.xml', str_replace(" ns=", " xmlns=", $xml->asXml() ?? '')); + file_put_contents($parentFilepath, str_replace(" ns=", " xmlns=", $xml->asXml() ?? '')); - rename($tempFilename, $filename); + rename($tempFilename, $path); } } } diff --git a/lib/OpdsNavigationEntry.php b/lib/OpdsNavigationEntry.php new file mode 100644 index 00000000..736a4613 --- /dev/null +++ b/lib/OpdsNavigationEntry.php @@ -0,0 +1,20 @@ +Id = SITE_URL . $url; + $this->Url = $url; + $this->Rel = $rel; + $this->Type = $type; + $this->Updated = $updated; + $this->Title = $title; + $this->Description = $description; + } +} diff --git a/lib/OpdsNavigationFeed.php b/lib/OpdsNavigationFeed.php new file mode 100644 index 00000000..56f60b88 --- /dev/null +++ b/lib/OpdsNavigationFeed.php @@ -0,0 +1,22 @@ +Entries = $entries; + } + + public function Save(string $path): void{ + $updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z'); + + $feed = Template::OpdsNavigationFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'entries' => $this->Entries]); + + $this->SaveIfChanged($path, $feed, $updatedTimestamp); + } +} diff --git a/scripts/deploy-ebook-to-www b/scripts/deploy-ebook-to-www index cf3643e6..eb8398ea 100755 --- a/scripts/deploy-ebook-to-www +++ b/scripts/deploy-ebook-to-www @@ -229,8 +229,8 @@ fi php "${scriptsDir}/generate-opds.php" --webroot "${webRoot}" --weburl "${webUrl}" -sudo chown se:committers /standardebooks.org/web/www/opds/*.xml -sudo chmod 664 /standardebooks.org/web/www/opds/*.xml +sudo chown --recursive se:committers /standardebooks.org/web/www/opds/* +sudo chmod --recursive 664 /standardebooks.org/web/www/opds/* if [ "${verbose}" = "true" ]; then printf "Done.\n" diff --git a/scripts/generate-opds.php b/scripts/generate-opds.php index 60b7b768..c919948e 100644 --- a/scripts/generate-opds.php +++ b/scripts/generate-opds.php @@ -13,7 +13,10 @@ $webUrl = $options["weburl"] ?? "https://standardebooks.org"; $contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? '')); $allEbooks = []; $newestEbooks = []; +$subjects = []; +$ebooksBySubject = []; +// Iterate over all ebooks to build the various feeds foreach($contentFiles as $path){ if($path == '') continue; @@ -23,15 +26,44 @@ foreach($contentFiles as $path){ $allEbooks[$ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; $newestEbooks[$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; + + foreach($ebook->Tags as $tag){ + // Add the book's subjects to the main subjects list + if(!in_array($tag->Name, $subjects)){ + $subjects[] = $tag->Name; + } + + // Sort this ebook by subject + $ebooksBySubject[$tag->Name][$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook; + } } +// Create the subjects navigation document +sort($subjects); +$subjectNavigationEntries = []; +foreach($subjects as $subject){ + // We leave the updated timestamp blank, as it will be filled in when we generate the individaul feeds + $subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', null, $subject, 'Browse Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.'); +} +$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', '/opds', $subjectNavigationEntries); +$subjectsFeed->Save(WEB_ROOT . '/opds/subjects/index.xml'); + +// Now generate each individual subject feed +foreach($ebooksBySubject as $subject => $ebooks){ + krsort($ebooks); + $subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe($subject), $subject, '/opds/subjects', $ebooks); + $subjectFeed->Save(WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe($subject) . '.xml'); +} + +// Create the 'all' feed krsort($allEbooks); -$allFeed = new OpdsFeed(SITE_URL . '/opds/all', 'All Standard Ebooks', $allEbooks, true); +$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', '/opds', $allEbooks, true); $allFeed->Save(WEB_ROOT . '/opds/all.xml'); +// Create the 'newest' feed krsort($newestEbooks); $newestEbooks = array_slice($newestEbooks, 0, 30); -$newestFeed = new OpdsFeed(SITE_URL . '/opds/newest', 'Newest 30 Standard Ebooks', $newestEbooks); -$newestFeed->Save(WEB_ROOT . '/opds/newest.xml'); +$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest 30 Standard Ebooks', '/opds', $newestEbooks); +$newestFeed->Save(WEB_ROOT . '/opds/new-releases.xml'); ?> diff --git a/templates/OpdsEntry.php b/templates/OpdsAcquisitionEntry.php similarity index 66% rename from templates/OpdsEntry.php rename to templates/OpdsAcquisitionEntry.php index 79fd08ad..b6745542 100644 --- a/templates/OpdsEntry.php +++ b/templates/OpdsAcquisitionEntry.php @@ -1,26 +1,26 @@ Url ?> - <?= $ebook->Title ?> + <?= htmlspecialchars($ebook->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?> Authors as $author){ ?> - Name ?> - WikipediaUrl !== null){ ?>WikipediaUrl ?> - FullName !== null){ ?>FullName ?> - NacoafUrl !== null){ ?>NacoafUrl ?> + Name, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + WikipediaUrl !== null){ ?>WikipediaUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + FullName !== null){ ?>FullName, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + NacoafUrl !== null){ ?>NacoafUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?> Timestamp->format('Y-m-d\TH:i:s\Z') ?> ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') ?> - Language ?> + Language, ENT_QUOTES|ENT_XML1, 'utf-8') ?> Standard Ebooks Sources as $source){ ?> - Url ?> + Url, ENT_QUOTES|ENT_XML1, 'utf-8') ?> Public domain in the United States; original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication - Description, ENT_QUOTES, 'UTF-8') ?> + Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?> LongDescription ?> LocTags as $subject){ ?> - + diff --git a/templates/OpdsFeed.php b/templates/OpdsAcquisitionFeed.php similarity index 56% rename from templates/OpdsFeed.php rename to templates/OpdsAcquisitionFeed.php index d5f7700c..7fc415ca 100644 --- a/templates/OpdsFeed.php +++ b/templates/OpdsAcquisitionFeed.php @@ -2,7 +2,7 @@ /* Notes: -- *All* OPDS feeds must contain a rel="crawlable" link pointing to the /opds/all feed +- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed - The element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds @@ -17,12 +17,13 @@ print("\n"); xmlns:fh="http://purl.org/syndication/history/1.0"> - - - - <?= $title ?> + + + + + <?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?> Free and liberated ebooks, carefully produced for the true book lover. - /images/logo.png + /images/logo.png @@ -30,6 +31,6 @@ print("\n"); - $ebook]) ?> + $ebook]) ?> diff --git a/templates/OpdsNavigationFeed.php b/templates/OpdsNavigationFeed.php new file mode 100644 index 00000000..6d0d41fa --- /dev/null +++ b/templates/OpdsNavigationFeed.php @@ -0,0 +1,36 @@ + element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds + +*/ +print("\n"); +?> + + + + + + + + <?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + Free and liberated ebooks, carefully produced for the true book lover. + /images/logo.png + + + Standard Ebooks + + + + + <?= htmlspecialchars($entry->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + + Updated !== null){ ?>Updated->format('Y-m-d\TH:i:s\Z') ?> + Id, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?> + + + diff --git a/www/opds/search.php b/www/opds/search.php index 1345f730..c1d785ad 100644 --- a/www/opds/search.php +++ b/www/opds/search.php @@ -20,22 +20,23 @@ catch(\Exception $ex){ header('Content-type: text/xml'); print("\n"); ?> - - https://standardebooks.org/opds/all + + https://standardebooks.org/opds/all?query= - - - + + + Standard Ebooks OPDS Search Results Free and liberated ebooks, carefully produced for the true book lover. - https://standardebooks.org/images/logo.png + /images/logo.png Format('Y-m-d\TH:i:s\Z') ?> Standard Ebooks https://standardebooks.org + - $ebook]) ?> + $ebook]) ?>