Refactor generate-opds script and create a new 'newest 30' OPDS feed

This commit is contained in:
Alex Cabal 2020-06-24 19:12:38 -05:00
parent 7a043c3e85
commit cae117951b
2 changed files with 57 additions and 42 deletions

42
lib/OpdsFeed.php Normal file
View file

@ -0,0 +1,42 @@
<?
use function Safe\file_put_contents;
use function Safe\rename;
use function Safe\tempnam;
class OpdsFeed{
public $Id;
public $Url;
public $Title;
public $Ebooks = [];
public $IsCrawlable;
public function __construct(string $url, string $title, array $ebooks, bool $isCrawlable = false){
$this->Url = $url;
$this->Id = $url;
$this->Title = $title;
$this->Ebooks = $ebooks;
$this->IsCrawlable = $isCrawlable;
}
public function Save(string $filename): void{
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
$feed = Template::OpdsFeed(['id' => $this->Url, 'url' => $this->Url, 'title' => $this->Title, 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => $this->IsCrawlable, 'entries' => $this->Ebooks]);
$tempFilename = tempnam('/tmp/', 'se-opds-');
file_put_contents($tempFilename, $feed);
exec('se clean ' . escapeshellarg($tempFilename));
rename($tempFilename, $filename);
// Update the index feed with the last updated timestamp
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents(WEB_ROOT . '/opds/index.xml')));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xml->registerXPathNamespace('schema', 'http://schema.org/');
$feedEntry = $xml->xpath('/feed/entry[id="' . $this->Id . '"]/updated')[0];
$feedEntry[0] = $updatedTimestamp;
file_put_contents(WEB_ROOT . '/opds/index.xml', str_replace(" ns=", " xmlns=", $xml->asXml()));
exec('se clean ' . WEB_ROOT . '/opds/index.xml');
}
}

View file

@ -1,4 +1,8 @@
<? <?
use function Safe\krsort;
use function Safe\getopt;
use function Safe\preg_replace;
$longopts = array("webroot:", "weburl:"); $longopts = array("webroot:", "weburl:");
$options = getopt("", $longopts); $options = getopt("", $longopts);
$webRoot = $options["webroot"] ?? "/standardebooks.org/web"; $webRoot = $options["webroot"] ?? "/standardebooks.org/web";
@ -6,12 +10,9 @@ $webUrl = $options["weburl"] ?? "https://standardebooks.org";
require_once($webRoot . '/lib/Core.php'); require_once($webRoot . '/lib/Core.php');
$updatedTimestamp = gmdate('Y-m-d\TH:i:s\Z');
$contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? '')); $contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? ''));
$sortedContentFiles = []; $allEbooks = [];
$newestEbooks = [];
$allFeedEbooks = '';
foreach($contentFiles as $path){ foreach($contentFiles as $path){
if($path == '') if($path == '')
@ -20,45 +21,17 @@ foreach($contentFiles as $path){
$ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $path) ?? ''; $ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $path) ?? '';
$ebook = new Ebook($ebookWwwFilesystemPath); $ebook = new Ebook($ebookWwwFilesystemPath);
$sortedContentFiles[$ebook->ModifiedTimestamp->format('Y-m-dTH:i:sZ') . ' ' . $ebook->Identifier] = $ebook; $allEbooks[$ebook->ModifiedTimestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
$newestEbooks[$ebook->Timestamp->format('Y-m-d\TH:i:s\Z') . ' ' . $ebook->Identifier] = $ebook;
} }
krsort($sortedContentFiles); krsort($allEbooks);
$allFeed = new OpdsFeed(SITE_URL . '/opds/all', 'All Standard Ebooks', $allEbooks, true);
$allFeed->Save(WEB_ROOT . '/opds/all.xml');
$url = SITE_URL . '/opds/all'; krsort($newestEbooks);
$newestEbooks = array_slice($newestEbooks, 0, 30);
$feed = Template::OpdsFeed(['id' => $url, 'url' => $url, 'title' => 'All Standard Ebooks', 'updatedTimestamp' => $updatedTimestamp, 'isCrawlable' => true, 'entries' => $sortedContentFiles]); $newestFeed = new OpdsFeed(SITE_URL . '/opds/newest', 'Newest 30 Standard Ebooks', $newestEbooks);
$newestFeed->Save(WEB_ROOT . '/opds/newest.xml');
$tempFilename = tempnam('/tmp/', 'se-opds-');
file_put_contents($tempFilename, $feed);
exec('se clean ' . escapeshellarg($tempFilename));
// If the feed has changed compared to the version currently on disk, copy our new version over
// and update the updated timestamp in the master opds index.
try{
if(filesize($webRoot . '/www/opds/all.xml') !== filesize($tempFilename)){
$oldFeed = file_get_contents($webRoot . '/www/opds/all.xml');
$newFeed = file_get_contents($tempFilename);
if($oldFeed != $newFeed){
file_put_contents($webRoot . '/www/opds/all.xml', $newFeed);
// Update the index feed with the last updated timestamp
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($webRoot . '/www/opds/index.xml')));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xml->registerXPathNamespace('schema', 'http://schema.org/');
$allUpdated = $xml->xpath('/feed/entry[id="https://standardebooks.org/opds/all"]/updated')[0];
$allUpdated[0] = $updatedTimestamp;
file_put_contents($webRoot . '/www/opds/index.xml', str_replace(" ns=", " xmlns=", $xml->asXml()));
exec('se clean ' . escapeshellarg($webRoot) . '/www/opds/index.xml');
}
}
}
catch(Exception $ex){
rename($tempFilename, $webRoot . '/www/opds/all.xml');
}
unlink($tempFilename);
?> ?>