mirror of
https://github.com/standardebooks/web.git
synced 2025-07-12 09:32:24 -04:00
Flesh out OPDS generation backend to be more robust and to support generic Atom feeds
This commit is contained in:
parent
35188195f1
commit
f6df03cfca
23 changed files with 1549 additions and 267 deletions
|
@ -121,12 +121,8 @@ if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
|
|||
die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
|
||||
fi
|
||||
|
||||
if ! [ -f "${scriptsDir}"/generate-opds ]; then
|
||||
die "\"${scriptsDir}\"/generate-opds\" is not a file or could not be found."
|
||||
fi
|
||||
|
||||
if ! [ -f "${scriptsDir}"/generate-rss ]; then
|
||||
die "\"${scriptsDir}\"/generate-rss\" is not a file or could not be found."
|
||||
if ! [ -f "${scriptsDir}"/generate-feeds ]; then
|
||||
die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
|
||||
fi
|
||||
|
||||
mkdir -p "${webRoot}"/www/images/covers/
|
||||
|
@ -382,7 +378,7 @@ if [ "${verbose}" = "true" ]; then
|
|||
printf "Rebuilding OPDS catalog ... "
|
||||
fi
|
||||
|
||||
"${scriptsDir}/generate-opds" --webroot "${webRoot}" --weburl "${webUrl}"
|
||||
"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
|
||||
|
||||
sudo chown --recursive se:committers "${webRoot}/www/opds/"*
|
||||
sudo chmod --recursive 664 "${webRoot}/www/opds/"*.xml
|
||||
|
@ -400,13 +396,6 @@ if [ "${verbose}" = "true" ]; then
|
|||
printf "Rebuilding new releases RSS feed ... "
|
||||
fi
|
||||
|
||||
output=$("${scriptsDir}/generate-rss" --webroot "${webRoot}" --weburl "${webUrl}")
|
||||
|
||||
# Check the return code; if the script failed (for example invalid XML in content.opf), don't overwrite the existing feed with a blank file
|
||||
if [ $? = 0 ]; then
|
||||
echo "${output}" > "${webRoot}/www/rss/new-releases.xml"
|
||||
fi
|
||||
|
||||
if [ "${verbose}" = "true" ]; then
|
||||
printf "Done.\n"
|
||||
fi
|
||||
|
|
|
@ -17,6 +17,7 @@ $allEbooks = [];
|
|||
$newestEbooks = [];
|
||||
$subjects = [];
|
||||
$ebooksBySubject = [];
|
||||
$ebooksPerNewestEbooksFeed = 30;
|
||||
|
||||
// Iterate over all ebooks to build the various feeds
|
||||
foreach($contentFiles as $path){
|
||||
|
@ -49,6 +50,36 @@ foreach($contentFiles as $path){
|
|||
}
|
||||
}
|
||||
|
||||
$now = new DateTime();
|
||||
|
||||
// Create OPDS feeds
|
||||
$opdsRootEntries = [
|
||||
new OpdsNavigationEntry(
|
||||
'/opds/new-releases',
|
||||
'http://opds-spec.org/sort/new',
|
||||
'acquisition',
|
||||
$now,
|
||||
'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks',
|
||||
'A list of the ' . number_format($ebooksPerNewestEbooksFeed) . ' newest Standard Ebooks, most-recently-released first.'),
|
||||
new OpdsNavigationEntry(
|
||||
'/opds/subjects',
|
||||
'subsection',
|
||||
'navigation',
|
||||
$now,
|
||||
'Standard Ebooks by Subject',
|
||||
'Browse Standard Ebooks by subject.'),
|
||||
new OpdsNavigationEntry(
|
||||
'/opds/all',
|
||||
'http://opds-spec.org/crawlable',
|
||||
'acquisition',
|
||||
$now,
|
||||
'All Standard Ebooks',
|
||||
'A list of all Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.')
|
||||
];
|
||||
|
||||
$opdsRoot = new OpdsNavigationFeed('/opds', 'Standard Ebooks', WEB_ROOT . '/opds/index.xml', $opdsRootEntries, null);
|
||||
$opdsRoot->Save();
|
||||
|
||||
// Create the subjects navigation document
|
||||
sort($subjects);
|
||||
$subjectNavigationEntries = [];
|
||||
|
@ -60,27 +91,32 @@ foreach($subjects as $subject){
|
|||
$summary .= ' tagged with “' . strtolower($subject) . ',” most-recently-released first.';
|
||||
|
||||
// We leave the updated timestamp blank, as it will be filled in when we generate the individual feeds
|
||||
$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', null, $subject, $summary);
|
||||
$subjectNavigationEntries[] = new OpdsNavigationEntry('/opds/subjects/' . Formatter::MakeUrlSafe($subject), 'subsection', 'navigation', $now, $subject, $summary);
|
||||
}
|
||||
$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', '/opds', $subjectNavigationEntries);
|
||||
$subjectsFeed->Save(WEB_ROOT . '/opds/subjects/index.xml');
|
||||
$subjectsFeed = new OpdsNavigationFeed('/opds/subjects', 'Standard Ebooks by Subject', WEB_ROOT . '/opds/subjects/index.xml', $subjectNavigationEntries, $opdsRoot);
|
||||
$subjectsFeed->Save();
|
||||
|
||||
// Now generate each individual subject feed
|
||||
foreach($ebooksBySubject as $subject => $ebooks){
|
||||
krsort($ebooks);
|
||||
$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject), (string)$subject, '/opds/subjects', $ebooks);
|
||||
$subjectFeed->Save(WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml');
|
||||
$subjectFeed = new OpdsAcquisitionFeed('/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject), (string)$subject, WEB_ROOT . '/opds/subjects/' . Formatter::MakeUrlSafe((string)$subject) . '.xml', $ebooks, $subjectsFeed);
|
||||
$subjectFeed->Save();
|
||||
}
|
||||
|
||||
// Create the 'all' feed
|
||||
krsort($allEbooks);
|
||||
$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', '/opds', $allEbooks, true);
|
||||
$allFeed->Save(WEB_ROOT . '/opds/all.xml');
|
||||
$allFeed = new OpdsAcquisitionFeed('/opds/all', 'All Standard Ebooks', WEB_ROOT . '/opds/all.xml', $allEbooks, $opdsRoot, true);
|
||||
$allFeed->Save();
|
||||
|
||||
// Create the 'newest' feed
|
||||
krsort($newestEbooks);
|
||||
$newestEbooks = array_slice($newestEbooks, 0, 30);
|
||||
$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest 30 Standard Ebooks', '/opds', $newestEbooks);
|
||||
$newestFeed->Save(WEB_ROOT . '/opds/new-releases.xml');
|
||||
$newestEbooks = array_slice($newestEbooks, 0, $ebooksPerNewestEbooksFeed);
|
||||
$newestFeed = new OpdsAcquisitionFeed('/opds/new-releases', 'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks', WEB_ROOT . '/opds/new-releases.xml', $newestEbooks, $opdsRoot);
|
||||
$newestFeed->Save();
|
||||
|
||||
// Now create RSS feeds
|
||||
|
||||
// Create the 'newest' feed
|
||||
$newestFeed = new RssFeed('/rss/new-releases', 'Newest ' . number_format($ebooksPerNewestEbooksFeed) . ' Standard Ebooks', WEB_ROOT . '/rss/new-releases.xml', 'A list of the ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks ebook releases, most-recently-released first.', $newestEbooks);
|
||||
$newestFeed->Save();
|
||||
?>
|
|
@ -1,90 +0,0 @@
|
|||
#!/usr/bin/php
|
||||
<?
|
||||
require_once('/standardebooks.org/web/lib/Core.php');
|
||||
|
||||
use function Safe\file_get_contents;
|
||||
use function Safe\getopt;
|
||||
use function Safe\gmdate;
|
||||
use function Safe\krsort;
|
||||
use function Safe\preg_replace;
|
||||
use function Safe\strtotime;
|
||||
|
||||
$longopts = ["webroot:", "weburl:"];
|
||||
$options = getopt("", $longopts);
|
||||
$webRoot = $options["webroot"] ?? "/standardebooks.org/web";
|
||||
$webUrl = $options["weburl"] ?? "https://standardebooks.org";
|
||||
|
||||
$rssLength = 30;
|
||||
$contentFiles = explode("\n", trim(shell_exec('find ' . escapeshellarg($webRoot . '/www/ebooks/') . ' -name "content.opf" | sort') ?? ''));
|
||||
|
||||
$sortedContentFiles = array();
|
||||
|
||||
foreach($contentFiles as $path){
|
||||
if($path == '')
|
||||
continue;
|
||||
|
||||
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
|
||||
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
|
||||
|
||||
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
|
||||
$publishedTimestamp = strtotime(array_shift($temp));
|
||||
|
||||
$sortedContentFiles[$publishedTimestamp] = $xml;
|
||||
}
|
||||
|
||||
krsort($sortedContentFiles);
|
||||
|
||||
$sortedContentFiles = array_slice($sortedContentFiles, 0, $rssLength);
|
||||
|
||||
// XSL stylesheet mime type must be `text/xsl` otherwise Chrome doesn't read it
|
||||
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<?xml-stylesheet href=\"/rss/style\" type=\"text/xsl\"?>\n");
|
||||
?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>Standard Ebooks - New Releases</title>
|
||||
<link><?= $webUrl ?></link>
|
||||
<description>A list of the <?= number_format($rssLength) ?> latest Standard Ebooks ebook releases, most-recently-released first.</description>
|
||||
<language>en-US</language>
|
||||
<copyright>https://creativecommons.org/publicdomain/zero/1.0/</copyright>
|
||||
<lastBuildDate><?= gmdate('D, d M Y H:i:s +0000') ?></lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<atom:link href="<?= $webUrl ?>/rss/new-releases" rel="self" type="application/rss+xml"/>
|
||||
<image>
|
||||
<url><?= $webUrl ?>/images/logo-rss.png</url>
|
||||
<title>Standard Ebooks - New Releases</title>
|
||||
<description>The Standard Ebooks logo</description>
|
||||
<link><?= $webUrl ?></link>
|
||||
<height>144</height>
|
||||
<width>144</width>
|
||||
</image>
|
||||
<? foreach($sortedContentFiles as $xml){
|
||||
$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
|
||||
$url = preg_replace('/^url:/ius', '', (string)array_shift($temp));
|
||||
$url = preg_replace('/^https:\/\/standardebooks.org/ius', $webUrl, $url);
|
||||
|
||||
$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
|
||||
$title = array_shift($temp) ?? '';
|
||||
|
||||
$temp = $xml->xpath('/package/metadata/dc:creator') ?: [];
|
||||
$title .= ', by ' . (array_shift($temp) ?? '');
|
||||
|
||||
$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
|
||||
$description = array_shift($temp) ?? '';
|
||||
|
||||
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
|
||||
$published = gmdate('D, d M Y H:i:s +0000', strtotime(array_shift($temp) ?? '') ?: 0);
|
||||
|
||||
$seSubjects = $xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [];
|
||||
?><item>
|
||||
<title><?= $title ?></title>
|
||||
<link><?= $url ?></link>
|
||||
<description><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></description>
|
||||
<pubDate><?= $published ?></pubDate>
|
||||
<guid><?= $url ?></guid>
|
||||
<? foreach($seSubjects as $seSubject){ ?>
|
||||
<category domain="standardebooks.org"><?= htmlspecialchars($seSubject, ENT_QUOTES, 'UTF-8') ?></category>
|
||||
<? } ?>
|
||||
</item>
|
||||
<? } ?>
|
||||
</channel>
|
||||
</rss>
|
Loading…
Add table
Add a link
Reference in a new issue