Flesh out OPDS generation backend to be more robust and to support generic Atom feeds

This commit is contained in:
Alex Cabal 2022-06-20 21:55:34 -05:00
parent 35188195f1
commit f6df03cfca
23 changed files with 1549 additions and 267 deletions

View file

@ -108,9 +108,10 @@ Define webroot /standardebooks.org/web
Header set Content-Type "text/xml"
</location>
# text/xml allows the page to be displayed in a browser. application/rss+xml will cause it to be downloaded.
# application/xml allows the page to be displayed in a browser and the encoding to be
# determined from the document and not the HTTP headers. application/rss+xml will cause it to be downloaded.
<Location ~ ^/rss/.*$>
Header set Content-Type "text/xml"
Header set Content-Type "application/xml"
</Location>
# Enable HTTP CORS so that browser-based readers like Readium can access opds and ebooks

View file

@ -107,9 +107,10 @@ Define webroot /standardebooks.org/web
Header set Content-Type "text/xml"
</location>
# text/xml allows the page to be displayed in a browser. application/rss+xml will cause it to be downloaded.
# application/xml allows the page to be displayed in a browser and the encoding to be
# determined from the document and not the HTTP headers. application/rss+xml will cause it to be downloaded.
<Location ~ ^/rss/.*$>
Header set Content-Type "text/xml"
Header set Content-Type "application/xml"
</Location>
# Enable HTTP CORS so that browser-based readers like Readium can access opds and ebooks

54
lib/AtomFeed.php Normal file
View file

@ -0,0 +1,54 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\preg_replace;
use function Safe\rename;
use function Safe\tempnam;
use function Safe\unlink;
class AtomFeed extends Feed{
public $Id;
public function __construct(string $url, string $title, string $path, array $entries){
parent::__construct($url, $title, $path, $entries);
$this->Id = 'https://standardebooks.org' . $url;
}
private function Sha1Entries(string $xmlString): string{
try{
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $xmlString));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xml->registerXPathNamespace('schema', 'http://schema.org/');
// Remove any <updated> elements, we don't want to compare against those.
foreach($xml->xpath('/feed/updated') ?: [] as $element){
unset($element[0]);
}
$output = '';
foreach($xml->xpath('/feed/entry') ?: [] as $entry){
$output .= $entry->asXml();
}
return sha1(preg_replace('/\s/ius', '', $output));
}
catch(Exception $ex){
// Invalid XML
return '';
}
}
protected function GetXmlString(): string{
if($this->XmlString === null){
$feed = Template::AtomFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'entries' => $this->Entries]);
$this->XmlString = $this->CleanXmlString($feed);
}
return $this->XmlString;
}
protected function HasChanged(string $path): bool{
return !is_file($path) || ($this->Sha1Entries($this->GetXmlString()) != $this->Sha1Entries(file_get_contents($path)));
}
}

46
lib/Feed.php Normal file
View file

@ -0,0 +1,46 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\tempnam;
use function Safe\unlink;
class Feed{
public $Url;
public $Title;
public $Entries = [];
public $Path = null;
public $Stylesheet = null;
protected $XmlString = null;
public function __construct(string $url, string $title, string $path, array $entries){
$this->Url = $url;
$this->Title = $title;
$this->Path = $path;
$this->Entries = $entries;
}
protected function CleanXmlString(string $xmlString): string{
$tempFilename = tempnam('/tmp/', 'se-');
file_put_contents($tempFilename, $xmlString);
exec('se clean ' . escapeshellarg($tempFilename) . ' 2>&1', $output); // Capture the result in case there's an error, otherwise it prints to stdout
$output = file_get_contents($tempFilename);
unlink($tempFilename);
if($this->Stylesheet !== null){
$output = str_replace("<?xml version=\"1.0\" encoding=\"utf-8\"?>", "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<?xml-stylesheet href=\"" . $this->Stylesheet . "\" type=\"text/xsl\"?>", $output);
}
return $output;
}
protected function GetXmlString(): string{
// Virtual function, meant to be implemented by subclass
return '';
}
function Save(): void{
$feed = $this->GetXmlString();
file_put_contents($this->Path, $feed);
}
}

View file

@ -35,4 +35,8 @@ class Formatter{
public static function ToPlainText(?string $text): string{
return htmlspecialchars(trim($text), ENT_QUOTES, 'UTF-8');
}
public static function ToPlainXmlText(?string $text): string{
return htmlspecialchars(trim($text), ENT_QUOTES|ENT_XML1, 'UTF-8');
}
}

View file

@ -1,5 +1,6 @@
<?
use function Safe\apcu_fetch;
use function Safe\ksort;
use function Safe\natsort;
use function Safe\preg_replace;
use function Safe\sleep;

View file

@ -1,25 +1,24 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\gmdate;
use function Safe\rename;
use function Safe\tempnam;
use Safe\DateTime;
class OpdsAcquisitionFeed extends OpdsFeed{
public $Ebooks = [];
public $IsCrawlable;
public function __construct(string $url, string $title, ?string $parentUrl, array $ebooks, bool $isCrawlable = false){
parent::__construct($url, $title, $parentUrl);
$this->Ebooks = $ebooks;
public function __construct(string $url, string $title, string $path, array $entries, ?OpdsNavigationFeed $parent, bool $isCrawlable = false){
parent::__construct($url, $title, $path, $entries, $parent);
$this->IsCrawlable = $isCrawlable;
}
public function Save(string $path): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
protected function GetXmlString(): string{
if($this->XmlString === null){
$this->XmlString = $this->CleanXmlString(Template::OpdsAcquisitionFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->Parent ? $this->Parent->Url : null, 'updatedTimestamp' => $this->Updated, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Entries]));
}
$feed = Template::OpdsAcquisitionFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
return $this->XmlString;
}
$this->SaveIfChanged($path, $feed, $updatedTimestamp);
public function Save(): void{
$this->Updated = new DateTime();
$this->SaveIfChanged();
}
}

View file

@ -1,87 +1,45 @@
<?
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\preg_replace;
use function Safe\rename;
use function Safe\tempnam;
use function Safe\unlink;
class OpdsFeed{
public $Id;
public $Url;
public $Title;
public $ParentUrl;
class OpdsFeed extends AtomFeed{
public $Updated = null;
public $Parent = null; // OpdsNavigationFeed class
public function __construct(string $url, string $title, ?string $parentUrl){
$this->Url = $url;
$this->Id = SITE_URL . $url;
$this->Title = $title;
$this->ParentUrl = $parentUrl;
public function __construct(string $url, string $title, string $path, array $entries, ?OpdsNavigationFeed $parent){
parent::__construct($url, $title, $path, $entries);
$this->Parent = $parent;
$this->Stylesheet = '/opds/style';
}
protected function Sha1Entries(string $xmlString): string{
try{
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $xmlString));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xml->registerXPathNamespace('schema', 'http://schema.org/');
protected function SaveUpdatedTimestamp(string $entryId, DateTime $updatedTimestamp): void{
// Only save the updated timestamp for the given entry ID in this file
// Remove any <updated> elements, we don't want to compare against those.
foreach($xml->xpath('//updated') ?: [] as $element){
unset($element[0]);
foreach($this->Entries as $entry){
if($entry->Id == $entryId){
$entry->Updated = $updatedTimestamp;
}
$output = '';
foreach($xml->xpath('/feed/entry') ?: [] as $entry){
$output .= $entry->asXml();
}
return sha1(preg_replace('/\s/ius', '', $output));
}
catch(Exception $ex){
// Invalid XML
return '';
$this->XmlString = null;
file_put_contents($this->Path, $this->GetXmlString());
// Do we have any parents of our own to update?
if($this->Parent !== null){
$this->Parent->SaveUpdatedTimestamp($this->Id, $updatedTimestamp);
}
}
protected function SaveIfChanged(string $path, string $feed, string $updatedTimestamp): void{
$tempFilename = tempnam('/tmp/', 'se-opds-');
file_put_contents($tempFilename, $feed);
exec('se clean ' . escapeshellarg($tempFilename) . ' 2>&1', $output); // Capture the result in case there's an error, otherwise it prints to stdout
$feed = file_get_contents($tempFilename);
protected function SaveIfChanged(): void{
// Did we actually update the feed? If so, write to file and update the index
if(!is_file($path) || ($this->Sha1Entries($feed) != $this->Sha1Entries(file_get_contents($path)))){
if($this->HasChanged($this->Path)){
// Files don't match, save the file and update the parent navigation feed with the last updated timestamp
$parentFilepath = WEB_ROOT . str_replace(SITE_URL, '', $this->ParentUrl);
if(!is_file($parentFilepath)){
$parentFilepath .= '/index.xml';
}
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($parentFilepath)));
$feedEntries = $xml->xpath('/feed/entry[id="' . $this->Id . '"]');
if(!$feedEntries){
$feedEntries = [];
if($this->Parent !== null){
$this->Parent->SaveUpdatedTimestamp($this->Id, $this->Updated);
}
if(sizeof($feedEntries) > 0){
$feedEntries[0]->{'updated'} = $updatedTimestamp;
}
$xmlString = $xml->asXml();
if($xmlString === false){
$xmlString = '';
}
file_put_contents($parentFilepath, str_replace(" ns=", " xmlns=", $xmlString));
// If we include this stylsheet declaration in the OPDS template, `se clean` will remove it and also
// add a bunch of empty namespaces in the output. So, add it programatically here instead.
file_put_contents($tempFilename, str_replace("?>", "?>\n<?xml-stylesheet href=\"/opds/style\" type=\"text/xsl\"?>", file_get_contents($tempFilename)));
rename($tempFilename, $path);
}
else{
unlink($tempFilename);
// Save our own file
parent::Save();
}
}
}

View file

@ -9,7 +9,7 @@ class OpdsNavigationEntry{
public $Title;
public function __construct(string $url, string $rel, string $type, ?DateTime $updated, string $title, string $description){
$this->Id = SITE_URL . $url;
$this->Id = 'https://standardebooks.org' . $url;
$this->Url = $url;
$this->Rel = $rel;
$this->Type = $type;

View file

@ -1,23 +1,46 @@
<?
use Safe\DateTime;
use function Safe\file_get_contents;
use function Safe\file_put_contents;
use function Safe\gmdate;
use function Safe\rename;
use function Safe\tempnam;
class OpdsNavigationFeed extends OpdsFeed{
public $Entries = [];
public function __construct(string $url, string $title, string $path, array $entries, ?OpdsNavigationFeed $parent){
parent::__construct($url, $title, $path, $entries, $parent);
public function __construct(string $url, string $title, ?string $parentUrl, array $entries){
parent::__construct($url, $title, $parentUrl);
$this->Entries = $entries;
// If the file already exists, try to fill in the existing updated timestamps from the file.
// That way, if the file has changed, we only update the changed entry,
// and not every single entry. This is only relevant to navigation feeds,
// because their *entries* along with their root updated timestamp change if their entries have an update.
// For acquisition feeds, only the root updated timestamp changes, so this is not a concern.
if(file_exists($this->Path)){
try{
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($this->Path)));
foreach($xml->xpath('//entry') ?: [] as $existingEntry){
foreach($this->Entries as $entry){
if($entry->Id == $existingEntry->id){
$entry->Updated = new DateTime($existingEntry->updated);
}
}
}
}
catch(Exception $ex){
// XML parsing failure
}
}
}
public function Save(string $path): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
protected function GetXmlString(): string{
if($this->XmlString === null){
$this->XmlString = $this->CleanXmlString(Template::OpdsNavigationFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->Parent ? $this->Parent->Url : null, 'updatedTimestamp' => $this->Updated, 'entries' => $this->Entries]));
}
$feed = Template::OpdsNavigationFeed(['id' => $this->Id, 'url' => $this->Url, 'title' => $this->Title, 'parentUrl' => $this->ParentUrl, 'updatedTimestamp' => $updatedTimestamp, 'entries' => $this->Entries]);
return $this->XmlString;
}
$this->SaveIfChanged($path, $feed, $updatedTimestamp);
public function Save(): void{
$this->Updated = new DateTime();
$this->SaveIfChanged();
}
}

20
lib/RssFeed.php Normal file
View file

@ -0,0 +1,20 @@
<?
class RssFeed extends Feed{
public $Description;
public function __construct(string $url, string $title, string $path, string $description, array $entries){
parent::__construct($url, $title, $path, $entries);
$this->Description = $description;
$this->Stylesheet = '/rss/style';
}
protected function GetXmlString(): string{
if($this->XmlString === null){
$feed = Template::RssFeed(['url' => $this->Url, 'description' => $this->Description, 'title' => $this->Title, 'entries' => $this->Entries, 'updatedTimestamp' => (new DateTime())->format('r')]);
$this->XmlString = $this->CleanXmlString($feed);
}
return $this->XmlString;
}
}

View file

@ -121,12 +121,8 @@ if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
fi
if ! [ -f "${scriptsDir}"/generate-opds ]; then
die "\"${scriptsDir}\"/generate-opds\" is not a file or could not be found."
fi
if ! [ -f "${scriptsDir}"/generate-rss ]; then
die "\"${scriptsDir}\"/generate-rss\" is not a file or could not be found."
if ! [ -f "${scriptsDir}"/generate-feeds ]; then
die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
fi
mkdir -p "${webRoot}"/www/images/covers/
@ -382,7 +378,7 @@ if [ "${verbose}" = "true" ]; then
printf "Rebuilding OPDS catalog ... "
fi
"${scriptsDir}/generate-opds" --webroot "${webRoot}" --weburl "${webUrl}"
"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
sudo chown --recursive se:committers "${webRoot}/www/opds/"*
sudo chmod --recursive 664 "${webRoot}/www/opds/"*.xml
@ -400,13 +396,6 @@ if [ "${verbose}" = "true" ]; then
printf "Rebuilding new releases RSS feed ... "
fi
output=$("${scriptsDir}/generate-rss" --webroot "${webRoot}" --weburl "${webUrl}")
# Check the return code; if the script failed (for example invalid XML in content.opf), don't overwrite the existing feed with a blank file
if [ $? = 0 ]; then
echo "${output}" > "${webRoot}/www/rss/new-releases.xml"
fi
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi

View file

@ -17,6 +17,7 @@ $allEbooks = [];
$newestEbooks = [];
$subjects = [];
$ebooksBySubject = [];
$ebooksPerNewestEbooksFeed = 30;
// Iterate over all ebooks to build the various feeds
foreach($contentFiles as $path){
@ -49,6 +50,36 @@ foreach($contentFiles as $path){
}
}
$now = new DateTime();
// Create OPDS feeds
$opdsRootEntries = [
new OpdsNavigationEntry(
'/opds/new-releases',
'http://opds-spec.org/sort/new',
'acquisition',
$now,
'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks',
'A list of the ' . number_format($ebooksPerNewestEbooksFeed) . ' newest Standard Ebooks, most-recently-released first.'),
new OpdsNavigationEntry(
'/opds/subjects',
'subsection',
'navigation',
$now,
'Standard Ebooks by Subject',
'Browse Standard Ebooks by subject.'),
new OpdsNavigationEntry(
'/opds/all',
'http://opds-spec.org/crawlable',
'acquisition',
$now,
'All Standard Ebooks',
'A list of all Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.')
];
$opdsRoot = new OpdsNavigationFeed('/opds', 'Standard Ebooks', WEB_ROOT . '/opds/index.xml', $opdsRootEntries, null);
$opdsRoot->Save();
// Create the subjects navigation document
sort($subjects);
$subjectNavigationEntries = [];
@ -60,27 +91,32 @@ foreach($subjects as $subject){
$summary .= ' tagged with “' . strtolower($subject) . ',” most-recently-released first.';
// We leave the updated timestamp blank, as it will be filled in when we generate the individual feeds
$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', null, $subject, $summary);
$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', $now, $subject, $summary);
}
$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', '/opds', $subjectNavigationEntries);
$subjectsFeed->Save(WEB_ROOT . '/opds/subjects/index.xml');
$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', WEB_ROOT . '/opds/subjects/index.xml', $subjectNavigationEntries, $opdsRoot);
$subjectsFeed->Save();
// Now generate each individual subject feed
foreach($ebooksBySubject as $subject => $ebooks){
krsort($ebooks);
$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject), (string)$subject, '/opds/subjects', $ebooks);
$subjectFeed->Save(WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml');
$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject), (string)$subject, WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks, $subjectsFeed);
$subjectFeed->Save();
}
// Create the 'all' feed
krsort($allEbooks);
$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', '/opds', $allEbooks, true);
$allFeed->Save(WEB_ROOT . '/opds/all.xml');
$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', WEB_ROOT . '/opds/all.xml', $allEbooks, $opdsRoot, true);
$allFeed->Save();
// Create the 'newest' feed
krsort($newestEbooks);
$newestEbooks = array_slice($newestEbooks, 0, 30);
$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest 30 Standard Ebooks', '/opds', $newestEbooks);
$newestFeed->Save(WEB_ROOT . '/opds/new-releases.xml');
$newestEbooks = array_slice($newestEbooks, 0, $ebooksPerNewestEbooksFeed);
$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks', WEB_ROOT . '/opds/new-releases.xml', $newestEbooks, $opdsRoot);
$newestFeed->Save();
// Now create RSS feeds
// Create the 'newest' feed
$newestFeed = new RssFeed('/rss/new-releases', 'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks', WEB_ROOT . '/rss/new-releases.xml', 'A list of the ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks ebook releases, most-recently-released first.', $newestEbooks);
$newestFeed->Save();
?>

View file

@ -1,90 +0,0 @@
#!/usr/bin/php
<?
require_once('/standardebooks.org/web/lib/Core.php');
use function Safe\file_get_contents;
use function Safe\getopt;
use function Safe\gmdate;
use function Safe\krsort;
use function Safe\preg_replace;
use function Safe\strtotime;
$longopts = ["webroot:", "weburl:"];
$options = getopt("", $longopts);
$webRoot = $options["webroot"] ?? "/standardebooks.org/web";
$webUrl = $options["weburl"] ?? "https://standardebooks.org";
$rssLength = 30;
$contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? ''));
$sortedContentFiles = array();
foreach($contentFiles as $path){
if($path == '')
continue;
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
$publishedTimestamp = strtotime(array_shift($temp));
$sortedContentFiles[$publishedTimestamp] = $xml;
}
krsort($sortedContentFiles);
$sortedContentFiles = array_slice($sortedContentFiles, 0, $rssLength);
// XSL stylesheet mime type must be `text/xsl` otherwise Chrome doesn't read it
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<?xml-stylesheet href=\"/rss/style\" type=\"text/xsl\"?>\n");
?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Standard Ebooks - New Releases</title>
<link><?= $webUrl ?></link>
<description>A list of the <?= number_format($rssLength) ?> latest Standard Ebooks ebook releases, most-recently-released first.</description>
<language>en-US</language>
<copyright>https://creativecommons.org/publicdomain/zero/1.0/</copyright>
<lastBuildDate><?= gmdate('D, d M Y H:i:s +0000') ?></lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<atom:link href="<?= $webUrl ?>/rss/new-releases" rel="self" type="application/rss+xml"/>
<image>
<url><?= $webUrl ?>/images/logo-rss.png</url>
<title>Standard Ebooks - New Releases</title>
<description>The Standard Ebooks logo</description>
<link><?= $webUrl ?></link>
<height>144</height>
<width>144</width>
</image>
<? foreach($sortedContentFiles as $xml){
$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
$url = preg_replace('/^url:/ius', '', (string)array_shift($temp));
$url = preg_replace('/^https:\/\/standardebooks.org/ius', $webUrl, $url);
$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
$title = array_shift($temp) ?? '';
$temp = $xml->xpath('/package/metadata/dc:creator') ?: [];
$title .= ', by ' . (array_shift($temp) ?? '');
$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
$description = array_shift($temp) ?? '';
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
$published = gmdate('D, d M Y H:i:s +0000', strtotime(array_shift($temp) ?? '') ?: 0);
$seSubjects = $xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [];
?><item>
<title><?= $title ?></title>
<link><?= $url ?></link>
<description><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></description>
<pubDate><?= $published ?></pubDate>
<guid><?= $url ?></guid>
<? foreach($seSubjects as $seSubject){ ?>
<category domain="standardebooks.org"><?= htmlspecialchars($seSubject, ENT_QUOTES, 'UTF-8') ?></category>
<? } ?>
</item>
<? } ?>
</channel>
</rss>

View file

@ -1,37 +1,37 @@
<entry>
<id><?= SITE_URL . $ebook->Url ?></id>
<title><?= htmlspecialchars($ebook->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<title><?= Formatter::ToPlainXmlText($ebook->Title) ?></title>
<? foreach($ebook->Authors as $author){ ?>
<author>
<name><?= htmlspecialchars($author->Name, ENT_QUOTES|ENT_XML1, 'utf-8') ?></name>
<name><?= Formatter::ToPlainXmlText($author->Name) ?></name>
<uri><?= SITE_URL . htmlspecialchars($ebook->AuthorsUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></uri>
<? if($author->FullName !== null){ ?><schema:alternateName><?= htmlspecialchars($author->FullName, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:alternateName><? } ?>
<? if($author->WikipediaUrl !== null){ ?><schema:sameAs><?= htmlspecialchars($author->WikipediaUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:sameAs><? } ?>
<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= htmlspecialchars($author->NacoafUrl, ENT_QUOTES|ENT_XML1, 'utf-8') ?></schema:sameAs><? } ?>
<? if($author->FullName !== null){ ?><schema:alternateName><?= Formatter::ToPlainXmlText($author->FullName) ?></schema:alternateName><? } ?>
<? if($author->WikipediaUrl !== null){ ?><schema:sameAs><?= Formatter::ToPlainXmlText($author->WikipediaUrl) ?></schema:sameAs><? } ?>
<? if($author->NacoafUrl !== null){ ?><schema:sameAs><?= Formatter::ToPlainXmlText($author->NacoafUrl) ?></schema:sameAs><? } ?>
</author>
<? } ?>
<dc:issued><?= $ebook->Timestamp->format('Y-m-d\TH:i:s\Z') ?></dc:issued>
<updated><?= $ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') ?></updated>
<dc:language><?= htmlspecialchars($ebook->Language, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:language>
<dc:language><?= Formatter::ToPlainXmlText($ebook->Language) ?></dc:language>
<dc:publisher>Standard Ebooks</dc:publisher>
<? foreach($ebook->Sources as $source){ ?>
<dc:source><?= htmlspecialchars($source->Url, ENT_QUOTES|ENT_XML1, 'utf-8') ?></dc:source>
<dc:source><?= Formatter::ToPlainXmlText($source->Url) ?></dc:source>
<? } ?>
<rights>Public domain in the United States. Users located outside of the United States must check their local laws before using this ebook. Original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication.</rights>
<summary type="text"><?= htmlspecialchars($ebook->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></summary>
<summary type="text"><?= Formatter::ToPlainXmlText($ebook->Description) ?></summary>
<content type="text/html"><?= $ebook->LongDescription ?></content>
<? foreach($ebook->LocTags as $subject){ ?>
<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES|ENT_XML1, 'utf-8') ?>"/>
<category scheme="http://purl.org/dc/terms/LCSH" term="<?= Formatter::ToPlainXmlText($subject) ?>"/>
<? } ?>
<? foreach($ebook->Tags as $subject){ ?>
<category scheme="https://standardebooks.org/vocab/subjects" term="<?= htmlspecialchars($subject->Name, ENT_QUOTES|ENT_XML1, 'utf-8') ?>"/>
<category scheme="https://standardebooks.org/vocab/subjects" term="<?= Formatter::ToPlainXmlText($subject->Name) ?>"/>
<? } ?>
<link href="<?= $ebook->Url ?>/downloads/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
<link href="<?= $ebook->Url ?>/downloads/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>
<link href="<?= $ebook->Url ?>" rel="related" title="This ebooks page at Standard Ebooks" type="text/html"/>
<link href="<?= $ebook->EpubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Recommended compatible epub" type="application/epub+zip" />
<link href="<?= $ebook->AdvancedEpubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Advanced epub" type="application/epub+zip" />
<link href="<?= $ebook->KepubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Kobo Kepub epub" type="application/kepub+zip" />
<link href="<?= $ebook->Azw3Url ?>" rel="http://opds-spec.org/acquisition/open-access" title="Amazon Kindle azw3" type="application/x-mobipocket-ebook" />
<link href="<?= $ebook->TextSinglePageUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="XHTML" type="application/xhtml+xml" />
<link href="<?= SITE_URL . $ebook->Url ?>/downloads/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
<link href="<?= SITE_URL . $ebook->Url ?>/downloads/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>
<link href="<?= SITE_URL . $ebook->Url ?>" rel="related" title="This ebooks page at Standard Ebooks" type="text/html"/>
<link href="<?= SITE_URL . $ebook->EpubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Recommended compatible epub" type="application/epub+zip" />
<link href="<?= SITE_URL . $ebook->AdvancedEpubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Advanced epub" type="application/epub+zip" />
<link href="<?= SITE_URL . $ebook->KepubUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="Kobo Kepub epub" type="application/kepub+zip" />
<link href="<?= SITE_URL . $ebook->Azw3Url ?>" rel="http://opds-spec.org/acquisition/open-access" title="Amazon Kindle azw3" type="application/x-mobipocket-ebook" />
<link href="<?= SITE_URL . $ebook->TextSinglePageUrl ?>" rel="http://opds-spec.org/acquisition/open-access" title="XHTML" type="application/xhtml+xml" />
</entry>

View file

@ -10,22 +10,22 @@
$isCrawlable = $isCrawlable ?? false;
// Note that the XSL stylesheet gets stripped during `se clean` when we generate the OPDS feed.
// Note that the XSL stylesheet gets stripped during `se clean` when we generate the feed.
// `se clean` will also start adding empty namespaces everywhere if we include the stylesheet declaration first.
// We have to add it programmatically when saving the OPDS file.
// We have to add it programmatically when saving the feed file.
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/"<? if($isCrawlable){ ?> xmlns:fh="http://purl.org/syndication/history/1.0"<? } ?>>
<id><?= $id ?></id>
<link href="<?= SITE_URL . $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<id><?= Formatter::ToPlainXmlText($id) ?></id>
<link href="<?= SITE_URL . htmlspecialchars($url, ENT_QUOTES|ENT_XML1, 'utf-8') ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="<?= SITE_URL ?>/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?><?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?><?= Formatter::ToPlainXmlText($parentUrl) ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?>/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="<?= SITE_URL ?>/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml"/>
<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<title><?= Formatter::ToPlainXmlText($title) ?></title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>/images/logo.png</icon>
<updated><?= $updatedTimestamp ?></updated>
<icon><?= SITE_URL ?>/images/logo.png</icon>
<updated><?= $updatedTimestamp->format('Y-m-d\TH:i:s\Z') ?></updated>
<? if($isCrawlable){ ?><fh:complete/><? } ?>
<author>
<name>Standard Ebooks</name>

View file

@ -1,40 +1,31 @@
<?
/* Notes:
- *All* OPDS feeds must contain a rel="http://opds-spec.org/crawlable" link pointing to the /opds/all feed
- The <fh:complete/> element is required to note this as a "Complete Acquisition Feeds"; see https://specs.opds.io/opds-1.2#25-complete-acquisition-feeds
*/
// Note that the XSL stylesheet gets stripped during `se clean` when we generate the OPDS feed.
// Note that the XSL stylesheet gets stripped during `se clean` when we generate the feed.
// `se clean` will also start adding empty namespaces everywhere if we include the stylesheet declaration first.
// We have to add it programmatically when saving the OPDS file.
// We have to add it programmatically when saving the feed file.
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
<id><?= $id ?></id>
<link href="<?= SITE_URL . $url ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<id><?= Formatter::ToPlainXmlText($id) ?></id>
<link href="<?= SITE_URL . htmlspecialchars($url, ENT_QUOTES|ENT_XML1, 'utf-8') ?>" rel="self" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?>/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="<?= SITE_URL ?>/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="<?= SITE_URL ?>/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml"/>
<? if($parentUrl !== null){ ?><link href="<?= SITE_URL ?><?= $parentUrl ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/><? } ?>
<title><?= htmlspecialchars($title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<? if($parentUrl !== null){ ?><link href="<?= SITE_URL ?><?= Formatter::ToPlainXmlText($parentUrl) ?>" rel="up" type="application/atom+xml;profile=opds-catalog;kind=navigation"/><? } ?>
<title><?= Formatter::ToPlainXmlText($title) ?></title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>/images/logo.png</icon>
<updated><?= $updatedTimestamp ?></updated>
<icon><?= SITE_URL ?>/images/logo.png</icon>
<updated><?= $updatedTimestamp->format('Y-m-d\TH:i:s\Z') ?></updated>
<author>
<name>Standard Ebooks</name>
<uri><?= SITE_URL ?></uri>
</author>
<? foreach($entries as $entry){ ?>
<entry>
<title><?= htmlspecialchars($entry->Title, ENT_QUOTES|ENT_XML1, 'utf-8') ?></title>
<link href="<?= $entry->Url ?>" rel="<?= $entry->Rel ?>" type="application/atom+xml;profile=opds-catalog;kind=<?= $entry->Type ?>"/>
<title><?= Formatter::ToPlainXmlText($entry->Title) ?></title>
<link href="<?= SITE_URL . Formatter::ToPlainXmlText($entry->Url) ?>" rel="<?= Formatter::ToPlainXmlText($entry->Rel) ?>" type="application/atom+xml;profile=opds-catalog;kind=<?= $entry->Type ?>"/>
<updated><? if($entry->Updated !== null){ ?><?= $entry->Updated->format('Y-m-d\TH:i:s\Z') ?><? } ?></updated>
<id><?= htmlspecialchars($entry->Id, ENT_QUOTES|ENT_XML1, 'utf-8') ?></id>
<content type="text"><?= htmlspecialchars($entry->Description, ENT_QUOTES|ENT_XML1, 'utf-8') ?></content>
<id><?= Formatter::ToPlainXmlText($entry->Id) ?></id>
<content type="text"><?= Formatter::ToPlainXmlText($entry->Description) ?></content>
</entry>
<? } ?>
</feed>

42
templates/RssFeed.php Normal file
View file

@ -0,0 +1,42 @@
<?
use Safe\DateTime;
// Note that the XSL stylesheet gets stripped during `se clean` when we generate the feed.
// `se clean` will also start adding empty namespaces everywhere if we include the stylesheet declaration first.
// We have to add it programmatically when saving the feed file.
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title><?= Formatter::ToPlainXmlText($title) ?></title>
<link><?= SITE_URL ?></link>
<description><?= Formatter::ToPlainXmlText($description) ?></description>
<language>en-US</language>
<copyright>https://creativecommons.org/publicdomain/zero/1.0/</copyright>
<lastBuildDate><?= $updatedTimestamp ?></lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<atom:link href="<?= SITE_URL . Formatter::ToPlainXmlText($url) ?>" rel="self" type="application/rss+xml"/>
<image>
<url><?= SITE_URL ?>/images/logo-rss.png</url>
<title><?= Formatter::ToPlainXmlText($title) ?></title> <? /* must be identical to channel title */ ?>
<description>The Standard Ebooks logo</description>
<link><?= SITE_URL ?></link>
<height>144</height>
<width>144</width>
</image>
<? foreach($entries as $entry){ ?>
<item>
<title><?= Formatter::ToPlainXmlText($entry->Title) ?></title>
<link><?= SITE_URL . Formatter::ToPlainXmlText($entry->Url) ?></link>
<description><?= Formatter::ToPlainXmlText($entry->Description) ?></description>
<pubDate><?= $entry->Timestamp->format('r') ?></pubDate>
<guid><?= Formatter::ToPlainXmlText(preg_replace('/^url:/ius', '', $entry->Identifier)) ?></guid>
<? foreach($entry->Tags as $tag){ ?>
<category domain="https://standardebooks.org/vocab/subjects"><?= Formatter::ToPlainXmlText($tag->Name) ?></category>
<? } ?>
<? if($entry->EpubUrl !== null){ ?>
<enclosure url="<?= SITE_URL . Formatter::ToPlainXmlText($entry->EpubUrl) ?>" length="<?= filesize(WEB_ROOT . $entry->EpubUrl) ?>" type="application/epub+zip" /> <? /* Only one <enclosure> is allowed */ ?>
<? } ?>
</item>
<? } ?>
</channel>
</rss>

1189
www/atom/new-releases.xml Normal file

File diff suppressed because it is too large Load diff

View file

@ -1190,12 +1190,14 @@ main.ebooks > aside.alert + ol{
margin-top: 4rem;
}
.rss .download,
.opds .download{
font-weight: bold;
margin-top: 1rem;
}
.rss .download + ul,
.rss .download + ul li,
.opds .download + ul,
.opds .download + ul > li{
margin-top: 0;

View file

@ -15,7 +15,7 @@ require_once('Core.php');
<p>We offers several feeds that you can use to get notified about new ebooks, or to browse and download from our catalog directly in your ereader.</p>
<section id="rss-feeds">
<h2>RSS feeds</h2>
<p>Currently theres only one RSS feed available.</p>
<p>RSS feeds can be read by one of the many <a href="https://en.wikipedia.org/wiki/Comparison_of_feed_aggregators">RSS clients</a> available for download, like <a href="https://www.thunderbird.net/en-US/">Thunderbird</a>.</p>
<ul class="feed">
<li>
<p><a href="/rss/new-releases">New releases</a> (RSS 2.0)</p>
@ -26,7 +26,7 @@ require_once('Core.php');
</section>
<section id="opds-feeds">
<h2>OPDS feeds</h2>
<p><a href="https://en.wikipedia.org/wiki/Open_Publication_Distribution_System">OPDS feeds</a> are designed for use with ereading systems like <a href="http://koreader.rocks/">KOreader</a> or <a href="https://calibre-ebook.com">Calibre</a>, or with ereaders like <a href="https://johnfactotum.github.io/foliate/">Foliate</a>. They allow you to search, browse, and download from our catalog, directly in your ereader.</p>
<p><a href="https://en.wikipedia.org/wiki/Open_Publication_Distribution_System">OPDS feeds</a> are designed for use with ereading systems like <a href="http://koreader.rocks/">KOreader</a> or <a href="https://calibre-ebook.com">Calibre</a>, or with ereaders like <a href="https://johnfactotum.github.io/foliate/">Foliate</a>. They allow you to search, browse, and download from our catalog, directly in your ereader. Theyre also perfect for organizations who wish to download and process our catalog efficiently.</p>
<ul class="feed">
<li>
<p><a href="/opds">The Standard Ebooks OPDS feed</a> (OPDS 1.2)</p>

View file

@ -2,7 +2,6 @@
require_once('Core.php');
use Safe\DateTime;
$now = new DateTime('now', new DateTimeZone('UTC'));
$ebooks = [];
try{
@ -21,15 +20,15 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<id>https://standardebooks.org/opds/all?query=<?= urlencode($query) ?></id>
<link href="/opds/all?query=<?= urlencode($query) ?>" rel="self" type="application/atom+xml;profile=opds-catalog"/>
<link href="/ebooks/ebooks?query=doyle" rel="alternate" type="text/html"/>
<link href="/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml"/>
<link href="https://standardebooks.org/opds/all?query=<?= urlencode($query) ?>" rel="self" type="application/atom+xml;profile=opds-catalog"/>
<link href="https://standardebooks.org/ebooks/ebooks?query=<?= urlencode($query) ?>" rel="alternate" type="text/html"/>
<link href="https://standardebooks.org/opds" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link href="https://standardebooks.org/opds/all" rel="http://opds-spec.org/crawlable" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="https://standardebooks.org/ebooks/opensearch" rel="search" type="application/opensearchdescription+xml"/>
<title>Standard Ebooks OPDS Search Results</title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>/images/logo.png</icon>
<updated><?= $now->Format('Y-m-d\TH:i:s\Z') ?></updated>
<updated><?= (new Datetime())->Format('Y-m-d\TH:i:s\Z') ?></updated>
<author>
<name>Standard Ebooks</name>
<uri>https://standardebooks.org</uri>

View file

@ -32,6 +32,23 @@ print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n")
<p>
<xsl:value-of select="description"/>
</p>
<xsl:if test="enclosure">
<p class="download">Read</p>
<ul>
<xsl:for-each select="enclosure">
<li>
<p>
<a>
<xsl:attribute name="href">
<xsl:value-of select="@url"/>
</xsl:attribute>
Download compatible epub
</a>
</p>
</li>
</xsl:for-each>
</ul>
</xsl:if>
</li>
</xsl:for-each>
</ol>