Add subjects OPDS feeds, and switch to a more object-oriented approach to generating the OPDS feeds

This commit is contained in:
Alex Cabal 2020-06-25 12:56:14 -05:00
parent a42de8ef4d
commit 133f93cdce
11 changed files with 187 additions and 51 deletions

1
.gitignore vendored
View file

@ -2,6 +2,7 @@ ebooks/*
www/ebooks/*
www/images/covers/*
www/opds/*.xml
www/opds/subjects
www/rss/*.xml
vendor/
composer.lock

View file

@ -0,0 +1,24 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\rename;
use function Safe\tempnam;
class OpdsAcquisitionFeed extends OpdsFeed{
public $Ebooks = [];
public $IsCrawlable;
public function __construct(string $url, string $title, ?string $parentUrl, array $ebooks, bool $isCrawlable = false){
parent::__construct($url, $title, $parentUrl);
$this->Ebooks = $ebooks;
$this->IsCrawlable = $isCrawlable;
}
public function Save(string $path): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
$feed = Template::OpdsAcquisitionFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
$this->SaveIfChanged($path, $feed, $updatedTimestamp);
}
}

View file

@ -8,18 +8,16 @@ class OpdsFeed{
public $Id;
public $Url;
public $Title;
public $Ebooks = [];
public $IsCrawlable;
public $ParentUrl;
public function __construct(string $url, string $title, array $ebooks, bool $isCrawlable = false){
public function __construct(string $url, string $title, ?string $parentUrl){
$this->Url = $url;
$this->Id = $url;
$this->Id = SITE_URL . $url;
$this->Title = $title;
$this->Ebooks = $ebooks;
$this->IsCrawlable = $isCrawlable;
$this->ParentUrl = $parentUrl;
}
private function Sha1Entries(string $xmlString): string{
protected function Sha1Entries(string $xmlString): string{
try{
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $xmlString));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
@ -28,6 +26,13 @@ class OpdsFeed{
$output = '';
foreach($entries as $entry){
// Remove any <updated> elements, we don't want to compare against those.
// This makes it easier to for example generate a new subjects index,
// while updating it at the same time.
foreach($xml->xpath('/feed/entry/updated') as $element){
unset($element[0]);
}
$output .= $entry->asXml();
}
@ -39,31 +44,25 @@ class OpdsFeed{
}
}
public function Save(string $filename): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
$feed = Template::OpdsFeed(['id' => $this->Url, 'url' => $this->Url, 'title' => $this->Title, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
protected function SaveIfChanged(string $path, string $feed, string $updatedTimestamp): void{
$tempFilename = tempnam('/tmp/', 'se-opds-');
file_put_contents($tempFilename, $feed);
exec('se clean ' . escapeshellarg($tempFilename));
// Did we actually update the feed? If so, write to file and update the index
if(!is_file($filename)){
// File doesn't exist, write it out
rename($tempFilename, $filename);
}
elseif($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($filename))){
// Files don't match, save the file and update the index feed with the last updated timestamp
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents(WEB_ROOT . '/opds/index.xml')));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xml->registerXPathNamespace('schema', 'http://schema.org/');
if(!is_file($path) || ($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($path)))){
// Files don't match, save the file and update the parent navigation feed with the last updated timestamp
$parentFilepath = WEB_ROOT . str_replace(SITE_URL, '', $this->ParentUrl);
if(!is_file($parentFilepath)){
$parentFilepath .= '/index.xml';
}
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($parentFilepath)));
$feedEntry = ($xml->xpath('/feed/entry[id="' . $this->Id . '"]/updated') ?? [])[0];
$feedEntry[0] = $updatedTimestamp;
file_put_contents(WEB_ROOT . '/opds/index.xml', str_replace(" ns=", " xmlns=", $xml->asXml() ?? ''));
file_put_contents($parentFilepath, str_replace(" ns=", " xmlns=", $xml->asXml() ?? ''));
rename($tempFilename, $filename);
rename($tempFilename, $path);
}
}
}

View file

@ -0,0 +1,20 @@
<?
class OpdsNavigationEntry{
public $Id;
public $Url;
public $Rel;
public $Type;
public $Updated;
public $Description;
public $Title;
public function __construct(string $url, string $rel, string $type, ?DateTime $updated, string $title, string $description){
$this->Id = SITE_URL . $url;
$this->Url = $url;
$this->Rel = $rel;
$this->Type = $type;
$this->Updated = $updated;
$this->Title = $title;
$this->Description = $description;
}
}

View file

@ -0,0 +1,22 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\rename;
use function Safe\tempnam;
class OpdsNavigationFeed extends OpdsFeed{
public $Entries = [];
public function __construct(string $url, string $title, ?string $parentUrl, array $entries){
parent::__construct($url, $title, $parentUrl);
$this->Entries = $entries;
}
public function Save(string $path): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
$feed = Template::OpdsNavigationFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'entries' => $this->Entries]);
$this->SaveIfChanged($path, $feed, $updatedTimestamp);
}
}

View file

@ -229,8 +229,8 @@ fi
php "${scriptsDir}/generate-opds.php" --webroot "${webRoot}" --weburl "${webUrl}"
sudo chown se:committers /standardebooks.org/web/www/opds/*.xml
sudo chmod 664 /standardebooks.org/web/www/opds/*.xml
sudo chown --recursive se:committers /standardebooks.org/web/www/opds/*
sudo chmod --recursive 664 /standardebooks.org/web/www/opds/*
if [ "${verbose}" = "true" ]; then
printf "Done.\n"

View file

@ -13,7 +13,10 @@ $webUrl = $options["weburl"] ?? "https://standardebooks.org";
$contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? ''));
$allEbooks = [];
$newestEbooks = [];
$subjects = [];
$ebooksBySubject = [];
// Iterate over all ebooks to build the various feeds
foreach($contentFiles as $path){
if($path == '')
continue;
@ -23,15 +26,44 @@ foreach($contentFiles as $path){
$allEbooks[$ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
$newestEbooks[$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
foreach($ebook->Tags as $tag){
// Add the book's subjects to the main subjects list
if(!in_array($tag->Name, $subjects)){
$subjects[] = $tag->Name;
}
// Sort this ebook by subject
$ebooksBySubject[$tag->Name][$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
}
}
// Create the subjects navigation document
sort($subjects);
$subjectNavigationEntries = [];
foreach($subjects as $subject){
// We leave the updated timestamp blank, as it will be filled in when we generate the individaul feeds
$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', null, $subject, 'Browse Standard Ebooks tagged with “' . strtolower($subject) . ',” most-recently-released first.');
}
$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', '/opds', $subjectNavigationEntries);
$subjectsFeed->Save(WEB_ROOT . '/opds/subjects/index.xml');
// Now generate each individual subject feed
foreach($ebooksBySubject as $subject => $ebooks){
krsort($ebooks);
$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe($subject), $subject, '/opds/subjects', $ebooks);
$subjectFeed->Save(WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe($subject) . '.xml');
}
// Create the 'all' feed
krsort($allEbooks);
$allFeed = new OpdsFeed(SITE_URL . '/opds/all', 'All Standard Ebooks', $allEbooks, true);
$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', '/opds', $allEbooks, true);
$allFeed->Save(WEB_ROOT . '/opds/all.xml');
// Create the 'newest' feed
krsort($newestEbooks);
$newestEbooks = array_slice($newestEbooks, 0, 30);
$newestFeed = new OpdsFeed(SITE_URL . '/opds/newest', 'Newest 30 Standard Ebooks', $newestEbooks);
$newestFeed->Save(WEB_ROOT . '/opds/newest.xml');
$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest 30 Standard Ebooks', '/opds', $newestEbooks);
$newestFeed->Save(WEB_ROOT . '/opds/new-releases.xml');
?>

View file

@ -1,26 +1,26 @@
<entry>
<id><?= SITE_URL . $ebook->Url ?></id>
<title><?= $ebook->Title ?></title>
<title><?= htmlspecialchars($ebook->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<? foreach($ebook->Authors as $author){ ?>
<author>
<name><?= $author->Name ?></name>
<? if($author->WikipediaUrl !== null){ ?><uri><?= $author->WikipediaUrl ?></uri><? } ?>
<? if($author->FullName !== null){ ?><schema:alternateName><?= $author->FullName ?></schema:alternateName><? } ?>
<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= $author->NacoafUrl ?></schema:sameAs><? } ?>
<name><?= htmlspecialchars($author->Name, ENT_QUOTES|ENT_XML1, 'utf-8') ?></name>
<? if($author->WikipediaUrl !== null){ ?><uri><?= htmlspecialchars($author->WikipediaUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></uri><? } ?>
<? if($author->FullName !== null){ ?><schema:alternateName><?= htmlspecialchars($author->FullName, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:alternateName><? } ?>
<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= htmlspecialchars($author->NacoafUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:sameAs><? } ?>
</author>
<? } ?>
<dc:issued><?= $ebook->Timestamp->format('Y-m-d\TH:i:s\Z') ?></dc:issued>
<updated><?= $ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') ?></updated>
<dc:language><?= $ebook->Language ?></dc:language>
<dc:language><?= htmlspecialchars($ebook->Language, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:language>
<dc:publisher>Standard Ebooks</dc:publisher>
<? foreach($ebook->Sources as $source){ ?>
<dc:source><?= $source->Url ?></dc:source>
<dc:source><?= htmlspecialchars($source->Url, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:source>
<? } ?>
<rights>Public domain in the United States; original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication</rights>
<summary type="text"><?= htmlspecialchars($ebook->Description, ENT_QUOTES, 'UTF-8') ?></summary>
<summary type="text"><?= htmlspecialchars($ebook->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></summary>
<content type="text/html"><?= $ebook->LongDescription ?></content>
<? foreach($ebook->LocTags as $subject){ ?>
<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES, 'UTF-8') ?>"/>
<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES|ENT_XML1, 'utf-8') ?>"/>
<? } ?>
<link href="<?= $ebook->Url ?>/dist/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
<link href="<?= $ebook->Url ?>/dist/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>

View file

@ -2,7 +2,7 @@
/* Notes:
- *All* OPDS feeds must contain a rel="crawlable" link pointing to the /opds/all feed
- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed
- The <fh:complete/> element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds
@ -17,12 +17,13 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/"<? if($isCrawlable){ ?> xmlns:fh="http://purl.org/syndication/history/1.0"<? } ?>>
<id><?= $id ?></id>
<link href="<?= $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="<?= SITE_URL ?>/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?>/opds/all" rel="crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="<?= SITE_URL ?>/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
<title><?= $title ?></title>
<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon><?= SITE_URL ?>/images/logo.png</icon>
<icon>/images/logo.png</icon>
<updated><?= $updatedTimestamp ?></updated>
<? if($isCrawlable){ ?><fh:complete/><? } ?>
<author>
@ -30,6 +31,6 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
<uri><?= SITE_URL ?></uri>
</author>
<? foreach($entries as $ebook){ ?>
<?= Template::OpdsEntry(['ebook' => $ebook]) ?>
<?= Template::OpdsAcquisitionEntry(['ebook' => $ebook]) ?>
<? } ?>
</feed>

View file

@ -0,0 +1,36 @@
<?
/* Notes:
- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed
- The <fh:complete/> element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds
*/
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/">
<id><?= $id ?></id>
<link href="<?= $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
<? if($parentUrl !== null){ ?><link href="<?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/><? } ?>
<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>/images/logo.png</icon>
<updated><?= $updatedTimestamp ?></updated>
<author>
<name>Standard Ebooks</name>
<uri><?= SITE_URL ?></uri>
</author>
<? foreach($entries as $entry){ ?>
<entry>
<title><?= htmlspecialchars($entry->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<link href="<?= $entry->Url ?>" rel="<?= $entry->Rel ?>" type="application/atom+xml;profile=opds-catalog;kind=<?= $entry->Type ?>"/>
<updated><? if($entry->Updated !== null){ ?><?= $entry->Updated->format('Y-m-d\TH:i:s\Z') ?><? } ?></updated>
<id><?= htmlspecialchars($entry->Id, ENT_QUOTES|ENT_XML1, 'utf-8') ?></id>
<content type="text"><?= htmlspecialchars($entry->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></content>
</entry>
<? } ?>
</feed>

View file

@ -20,22 +20,23 @@ catch(\Exception $ex){
header('Content-type: text/xml');
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/" xmlns:fh="http://purl.org/syndication/history/1.0">
<id>https://standardebooks.org/opds/all</id>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<id>https://standardebooks.org/opds/all?query=<?= urlencode($query) ?></id>
<link href="/opds/all?query=<?= urlencode($query) ?>" rel="self" type="application/atom+xml;profile=opds-catalog"/>
<link href="/ebooks/ebooks?query=doyle" rel="alternate" type="text/html"/>
<link href="https://standardebooks.org/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="https://standardebooks.org/opds/all" rel="crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="https://standardebooks.org/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml" />
<title>Standard Ebooks OPDS Search Results</title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>https://standardebooks.org/images/logo.png</icon>
<icon>/images/logo.png</icon>
<updated><?= $now->Format('Y-m-d\TH:i:s\Z') ?></updated>
<author>
<name>Standard Ebooks</name>
<uri>https://standardebooks.org</uri>
</author>
<opensearch:totalResults><?= sizeof($ebooks) ?></opensearch:totalResults>
<? foreach($ebooks as $ebook){ ?>
<?= Template::OpdsEntry(['ebook' => $ebook]) ?>
<?= Template::OpdsAcquisitionEntry(['ebook' => $ebook]) ?>
<? } ?>
</feed>