Add subjects OPDS feeds, and switch to a more object-oriented approach to generating the OPDS feeds

2025-07-05 14:20:29 -04:00 · 2020-06-25 12:56:14 -05:00 · 2020-06-25 12:56:14 -05:00 · 133f93cdce
commit 133f93cdce
parent a42de8ef4d
11 changed files with 187 additions and 51 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@ ebooks/*
 www/ebooks/*
 www/images/covers/*
 www/opds/*.xml
+www/opds/subjects
 www/rss/*.xml
 vendor/
 composer.lock
--- a/lib/OpdsAcquisitionFeed.php
+++ b/lib/OpdsAcquisitionFeed.php
@ -0,0 +1,24 @@
+<?
+use function Safe\file_get_contents;
+use function Safe\file_put_contents;
+use function Safe\rename;
+use function Safe\tempnam;
+
+class OpdsAcquisitionFeed extends OpdsFeed{
+	public $Ebooks = [];
+	public $IsCrawlable;
+
+	public function __construct(string $url, string $title, ?string $parentUrl, array $ebooks, bool $isCrawlable = false){
+		parent::__construct($url, $title, $parentUrl);
+		$this->Ebooks = $ebooks;
+		$this->IsCrawlable = $isCrawlable;
+	}
+
+	public function Save(string $path): void{
+		$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
+
+		$feed = Template::OpdsAcquisitionFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
+
+		$this->SaveIfChanged($path, $feed, $updatedTimestamp);
+	}
+}
--- a/lib/OpdsFeed.php
+++ b/lib/OpdsFeed.php
@ -8,18 +8,16 @@ class OpdsFeed{
 	public $Id;
 	public $Url;
 	public $Title;
-	public $Ebooks = [];
-	public $IsCrawlable;
+	public $ParentUrl;

-	public function __construct(string $url, string $title, array $ebooks, bool $isCrawlable = false){
+	public function __construct(string $url, string $title, ?string $parentUrl){
 		$this->Url = $url;
-		$this->Id = $url;
+		$this->Id = SITE_URL . $url;
 		$this->Title = $title;
-		$this->Ebooks = $ebooks;
-		$this->IsCrawlable = $isCrawlable;
+		$this->ParentUrl = $parentUrl;
 	}

-	private function Sha1Entries(string $xmlString): string{
+	protected function Sha1Entries(string $xmlString): string{
 		try{
 			$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $xmlString));
 			$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
@ -28,6 +26,13 @@ class OpdsFeed{

 			$output = '';
 			foreach($entries as $entry){
+				// Remove any <updated> elements, we don't want to compare against those.
+				// This makes it easier to for example generate a new subjects index,
+				// while updating it at the same time.
+				foreach($xml->xpath('/feed/entry/updated') as $element){
+					unset($element[0]);
+				}
+
 				$output .= $entry->asXml();
 			}

@ -39,31 +44,25 @@ class OpdsFeed{
 		}
 	}

-	public function Save(string $filename): void{
-		$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
-
-		$feed = Template::OpdsFeed(['id' => $this->Url, 'url' => $this->Url, 'title' => $this->Title, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
-
+	protected function SaveIfChanged(string $path, string $feed, string $updatedTimestamp): void{
 		$tempFilename = tempnam('/tmp/', 'se-opds-');
 		file_put_contents($tempFilename, $feed);
 		exec('se clean ' . escapeshellarg($tempFilename));

 		// Did we actually update the feed? If so, write to file and update the index
-		if(!is_file($filename)){
-			// File doesn't exist, write it out
-			rename($tempFilename, $filename);
-		}
-		elseif($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($filename))){
-			// Files don't match, save the file and update the index feed with the last updated timestamp
-			$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents(WEB_ROOT . '/opds/index.xml')));
-			$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
-			$xml->registerXPathNamespace('schema', 'http://schema.org/');
+		if(!is_file($path) || ($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($path)))){
+			// Files don't match, save the file and update the parent navigation feed with the last updated timestamp
+			$parentFilepath = WEB_ROOT . str_replace(SITE_URL, '', $this->ParentUrl);
+			if(!is_file($parentFilepath)){
+				$parentFilepath .= '/index.xml';
+			}
+			$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($parentFilepath)));

 			$feedEntry = ($xml->xpath('/feed/entry[id="' . $this->Id . '"]/updated') ?? [])[0];
 			$feedEntry[0] = $updatedTimestamp;
-			file_put_contents(WEB_ROOT . '/opds/index.xml', str_replace(" ns=", " xmlns=", $xml->asXml() ?? ''));
+			file_put_contents($parentFilepath, str_replace(" ns=", " xmlns=", $xml->asXml() ?? ''));

-			rename($tempFilename, $filename);
+			rename($tempFilename, $path);
 		}
 	}
 }
--- a/lib/OpdsNavigationEntry.php
+++ b/lib/OpdsNavigationEntry.php
@ -0,0 +1,20 @@
+<?
+class OpdsNavigationEntry{
+	public $Id;
+	public $Url;
+	public $Rel;
+	public $Type;
+	public $Updated;
+	public $Description;
+	public $Title;
+
+	public function __construct(string $url, string $rel, string $type, ?DateTime $updated, string $title, string $description){
+		$this->Id = SITE_URL . $url;
+		$this->Url = $url;
+		$this->Rel = $rel;
+		$this->Type = $type;
+		$this->Updated = $updated;
+		$this->Title = $title;
+		$this->Description = $description;
+	}
+}
--- a/lib/OpdsNavigationFeed.php
+++ b/lib/OpdsNavigationFeed.php
@ -0,0 +1,22 @@
+<?
+use function Safe\file_get_contents;
+use function Safe\file_put_contents;
+use function Safe\rename;
+use function Safe\tempnam;
+
+class OpdsNavigationFeed extends OpdsFeed{
+	public $Entries = [];
+
+	public function __construct(string $url, string $title, ?string $parentUrl, array $entries){
+		parent::__construct($url, $title, $parentUrl);
+		$this->Entries = $entries;
+	}
+
+	public function Save(string $path): void{
+		$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
+
+		$feed = Template::OpdsNavigationFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'entries' => $this->Entries]);
+
+		$this->SaveIfChanged($path, $feed, $updatedTimestamp);
+	}
+}
--- a/scripts/deploy-ebook-to-www
+++ b/scripts/deploy-ebook-to-www
@ -229,8 +229,8 @@ fi

 php "${scriptsDir}/generate-opds.php" --webroot "${webRoot}" --weburl "${webUrl}"

-sudo chown se:committers /standardebooks.org/web/www/opds/*.xml
-sudo chmod 664 /standardebooks.org/web/www/opds/*.xml
+sudo chown --recursive se:committers /standardebooks.org/web/www/opds/*
+sudo chmod --recursive 664 /standardebooks.org/web/www/opds/*

 if [ "${verbose}" = "true" ]; then
 	printf "Done.\n"
--- a/scripts/generate-opds.php
+++ b/scripts/generate-opds.php
@ -13,7 +13,10 @@ $webUrl = $options["weburl"] ?? "https://standardebooks.org";
 $contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? ''));
 $allEbooks = [];
 $newestEbooks = [];
+$subjects = [];
+$ebooksBySubject = [];

+// Iterate over all ebooks to build the various feeds
 foreach($contentFiles as $path){
 	if($path == '')
 		continue;
@ -23,15 +26,44 @@ foreach($contentFiles as $path){

 	$allEbooks[$ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
 	$newestEbooks[$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
+
+	foreach($ebook->Tags as $tag){
+		// Add the book's subjects to the main subjects list
+		if(!in_array($tag->Name, $subjects)){
+			$subjects[] = $tag->Name;
+		}
+
+		// Sort this ebook by subject
+		$ebooksBySubject[$tag->Name][$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
+	}
 }

+// Create the subjects navigation document
+sort($subjects);
+$subjectNavigationEntries = [];
+foreach($subjects as $subject){
+	// We leave the updated timestamp blank, as it will be filled in when we generate the individaul feeds
+	$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', null, $subject, 'Browse Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.');
+}
+$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', '/opds', $subjectNavigationEntries);
+$subjectsFeed->Save(WEB_ROOT . '/opds/subjects/index.xml');
+
+// Now generate each individual subject feed
+foreach($ebooksBySubject as $subject => $ebooks){
+	krsort($ebooks);
+	$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe($subject), $subject, '/opds/subjects', $ebooks);
+	$subjectFeed->Save(WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe($subject) . '.xml');
+}
+
+// Create the 'all' feed
 krsort($allEbooks);
-$allFeed = new OpdsFeed(SITE_URL . '/opds/all', 'All Standard Ebooks', $allEbooks, true);
+$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', '/opds', $allEbooks, true);
 $allFeed->Save(WEB_ROOT . '/opds/all.xml');

+// Create the 'newest' feed
 krsort($newestEbooks);
 $newestEbooks = array_slice($newestEbooks, 0, 30);
-$newestFeed = new OpdsFeed(SITE_URL . '/opds/newest', 'Newest 30 Standard Ebooks', $newestEbooks);
-$newestFeed->Save(WEB_ROOT . '/opds/newest.xml');
+$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest 30 Standard Ebooks', '/opds', $newestEbooks);
+$newestFeed->Save(WEB_ROOT . '/opds/new-releases.xml');

 ?>
--- a/templates/OpdsAcquisitionEntry.php
+++ b/templates/OpdsAcquisitionEntry.php
@ -1,26 +1,26 @@
 <entry>
 	<id><?= SITE_URL . $ebook->Url ?></id>
-	<title><?= $ebook->Title ?></title>
+	<title><?= htmlspecialchars($ebook->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
 	<? foreach($ebook->Authors as $author){ ?>
 		<author>
-			<name><?= $author->Name ?></name>
-			<? if($author->WikipediaUrl !== null){ ?><uri><?= $author->WikipediaUrl ?></uri><? } ?>
-			<? if($author->FullName !== null){ ?><schema:alternateName><?= $author->FullName ?></schema:alternateName><? } ?>
-			<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= $author->NacoafUrl ?></schema:sameAs><? } ?>
+			<name><?= htmlspecialchars($author->Name, ENT_QUOTES|ENT_XML1, 'utf-8') ?></name>
+			<? if($author->WikipediaUrl !== null){ ?><uri><?= htmlspecialchars($author->WikipediaUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></uri><? } ?>
+			<? if($author->FullName !== null){ ?><schema:alternateName><?= htmlspecialchars($author->FullName, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:alternateName><? } ?>
+			<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= htmlspecialchars($author->NacoafUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:sameAs><? } ?>
 		</author>
 	<? } ?>
 	<dc:issued><?= $ebook->Timestamp->format('Y-m-d\TH:i:s\Z') ?></dc:issued>
 	<updated><?= $ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') ?></updated>
-	<dc:language><?= $ebook->Language ?></dc:language>
+	<dc:language><?= htmlspecialchars($ebook->Language, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:language>
 	<dc:publisher>Standard Ebooks</dc:publisher>
 	<? foreach($ebook->Sources as $source){ ?>
-	<dc:source><?= $source->Url ?></dc:source>
+	<dc:source><?= htmlspecialchars($source->Url, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:source>
 	<? } ?>
 	<rights>Public domain in the United States; original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication</rights>
-	<summary type="text"><?= htmlspecialchars($ebook->Description, ENT_QUOTES, 'UTF-8') ?></summary>
+	<summary type="text"><?= htmlspecialchars($ebook->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></summary>
 	<content type="text/html"><?= $ebook->LongDescription ?></content>
 	<? foreach($ebook->LocTags as $subject){ ?>
-	<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES, 'UTF-8') ?>"/>
+	<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES|ENT_XML1, 'utf-8') ?>"/>
 	<? } ?>
 	<link href="<?= $ebook->Url ?>/dist/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
 	<link href="<?= $ebook->Url ?>/dist/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>
--- a/templates/OpdsAcquisitionFeed.php
+++ b/templates/OpdsAcquisitionFeed.php
@ -2,7 +2,7 @@

 /* Notes:

- *All* OPDS feeds must contain a rel="crawlable" link pointing to the /opds/all feed
+- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed

 - The <fh:complete/> element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds

@ -17,12 +17,13 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
 <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/"<? if($isCrawlable){ ?> xmlns:fh="http://purl.org/syndication/history/1.0"<? } ?>>
 	<id><?= $id ?></id>
 	<link href="<?= $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
-	<link href="<?= SITE_URL ?>/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
-	<link href="<?= SITE_URL ?>/opds/all" rel="crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
-	<link href="<?= SITE_URL ?>/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
-	<title><?= $title ?></title>
+	<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
+	<link href="<?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
+	<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
+	<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
+	<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
 	<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
-	<icon><?= SITE_URL ?>/images/logo.png</icon>
+	<icon>/images/logo.png</icon>
 	<updated><?= $updatedTimestamp ?></updated>
 	<? if($isCrawlable){ ?><fh:complete/><? } ?>
 	<author>
@ -30,6 +31,6 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
 		<uri><?= SITE_URL ?></uri>
 	</author>
 	<? foreach($entries as $ebook){ ?>
-		<?= Template::OpdsEntry(['ebook' => $ebook]) ?>
+		<?= Template::OpdsAcquisitionEntry(['ebook' => $ebook]) ?>
 	<? } ?>
 </feed>
--- a/templates/OpdsNavigationFeed.php
+++ b/templates/OpdsNavigationFeed.php
@ -0,0 +1,36 @@
+<?
+
+/* Notes:
+
+- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed
+
+- The <fh:complete/> element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds
+
+*/
+print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
+?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/">
+	<id><?= $id ?></id>
+	<link href="<?= $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
+	<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
+	<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
+	<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
+	<? if($parentUrl !== null){ ?><link href="<?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/><? } ?>
+	<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
+	<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
+	<icon>/images/logo.png</icon>
+	<updated><?= $updatedTimestamp ?></updated>
+	<author>
+		<name>Standard Ebooks</name>
+		<uri><?= SITE_URL ?></uri>
+	</author>
+	<? foreach($entries as $entry){ ?>
+		<entry>
+			<title><?= htmlspecialchars($entry->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
+			<link href="<?= $entry->Url ?>" rel="<?= $entry->Rel ?>" type="application/atom+xml;profile=opds-catalog;kind=<?= $entry->Type ?>"/>
+			<updated><? if($entry->Updated !== null){ ?><?= $entry->Updated->format('Y-m-d\TH:i:s\Z') ?><? } ?></updated>
+			<id><?= htmlspecialchars($entry->Id, ENT_QUOTES|ENT_XML1, 'utf-8') ?></id>
+			<content type="text"><?= htmlspecialchars($entry->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></content>
+		</entry>
+	<? } ?>
+</feed>
--- a/www/opds/search.php
+++ b/www/opds/search.php
@ -20,22 +20,23 @@ catch(\Exception $ex){
 header('Content-type: text/xml');
 print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
 ?>
-<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/" xmlns:fh="http://purl.org/syndication/history/1.0">
-	<id>https://standardebooks.org/opds/all</id>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
+	<id>https://standardebooks.org/opds/all?query=<?= urlencode($query) ?></id>
 	<link href="/opds/all?query=<?= urlencode($query) ?>" rel="self" type="application/atom+xml;profile=opds-catalog"/>
 	<link href="/ebooks/ebooks?query=doyle" rel="alternate" type="text/html"/>
-	<link href="https://standardebooks.org/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
-	<link href="https://standardebooks.org/opds/all" rel="crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
-	<link href="https://standardebooks.org/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
+	<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
+	<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
+	<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
 	<title>Standard Ebooks OPDS Search Results</title>
 	<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
-	<icon>https://standardebooks.org/images/logo.png</icon>
+	<icon>/images/logo.png</icon>
 	<updated><?= $now->Format('Y-m-d\TH:i:s\Z') ?></updated>
 	<author>
 		<name>Standard Ebooks</name>
 		<uri>https://standardebooks.org</uri>
 	</author>
+	<opensearch:totalResults><?= sizeof($ebooks) ?></opensearch:totalResults>
 <? foreach($ebooks as $ebook){ ?>
-	<?= Template::OpdsEntry(['ebook' => $ebook]) ?>
+	<?= Template::OpdsAcquisitionEntry(['ebook' => $ebook]) ?>
 <? } ?>
 </feed>