web/scripts/generate-feeds
2024-12-13 11:46:36 -06:00

261 lines
12 KiB
PHP
Executable file

#!/usr/bin/php
<?
require_once('/standardebooks.org/web/lib/Core.php');
use Safe\DateTimeImmutable;
use function Safe\exec;
use function Safe\getopt;
use function Safe\mkdir;
use function Safe\preg_replace;
function SortByUpdatedDesc(Ebook $a, Ebook $b): int{
return $b->EbookUpdated <=> $a->EbookUpdated;
}
function SaveFeed(Feed $feed, bool $force, ?string $label = null, ?string $labelSort = null, ?DateTimeImmutable $now = null): void{
$updateAttrs = false;
if($force){
if($now !== null){
$feed->Updated = $now;
}
$feed->Save();
$updateAttrs = true;
}
else{
$updateAttrs = $feed->SaveIfChanged();
}
if($updateAttrs && $label !== null && $labelSort !== null){
exec('attr -q -s se-label -V ' . escapeshellarg($label) . ' ' . escapeshellarg($feed->Path));
exec('attr -q -s se-label-sort -V ' . escapeshellarg($labelSort) . ' ' . escapeshellarg($feed->Path));
}
}
/**
* @param array<string, array<string, string>> $collections
* @param array<string, array<Ebook>> $ebooks
*/
function CreateOpdsCollectionFeed(Enums\FeedCollectionType $collectionType, string $url, string $description, array $collections, array $ebooks, string $webRoot, OpdsNavigationFeed $opdsRoot, bool $force): void{
$collator = Collator::create('en_US'); // Used for sorting letters with diacritics, like in author names.
$name = preg_replace('/s$/', '', $collectionType->value);
if($collator === null){
return;
}
usort($collections, function($a, $b) use($collator){
$result = $collator->compare($a['sortedname'], $b['sortedname']);
return $result === false ? 0 : $result;
});
// Create the collections navigation document.
$collectionNavigationEntries = [];
foreach($collections as $collection){
$entry = new OpdsNavigationEntry($collection['name'], str_replace('%s', $collection['name'], $description), $url . '/' . $collection['id'], NOW, 'subsection', 'navigation');
$entry->SortTitle = $collection['sortedname'];
$collectionNavigationEntries[] = $entry;
}
$collectionsFeed = new OpdsNavigationFeed('Standard Ebooks by ' . ucfirst($name), 'Browse Standard Ebooks by ' . $name . '.', $url, $webRoot . $url . '/index.xml', $collectionNavigationEntries, $opdsRoot);
$collectionsFeed->Subtitle = 'Browse Standard Ebooks by ' . $name . '.';
SaveFeed($collectionsFeed, $force, null, null, NOW);
// Now generate each individual collection feed.
foreach($collectionNavigationEntries as $collectionNavigationEntry){
$id = basename($collectionNavigationEntry->Id);
usort($ebooks[$id], 'SortByUpdatedDesc');
$collectionFeed = new OpdsAcquisitionFeed($collectionNavigationEntry->Title . ' Ebooks', $collectionNavigationEntry->Description, $url . '/' . $id, $webRoot . $url . '/' . $id . '.xml', $ebooks[$id], $collectionsFeed);
SaveFeed($collectionFeed, $force, $collectionNavigationEntry->Title, $collectionNavigationEntry->SortTitle, NOW);
}
}
$longopts = ['webroot:', 'force'];
$options = getopt('', $longopts);
$webRoot = $options['webroot'] ?? WEB_ROOT;
$force = isset($options['force']) ? true : false; // If the arg is present, getopts sets it to false!!!
$allEbooks = [];
$newestEbooks = [];
$subjects = [];
$ebooksBySubject = [];
$collections = [];
$ebooksByCollection = [];
$authors = [];
$ebooksByAuthor = [];
$ebooksPerNewestEbooksFeed = 15;
$dirs = [ '/feeds/opds/subjects', '/feeds/rss/subjects', '/feeds/atom/subjects',
'/feeds/opds/collections', '/feeds/rss/collections', '/feeds/atom/collections',
'/feeds/opds/authors', '/feeds/rss/authors', '/feeds/atom/authors'
];
foreach($dirs as $dir){
if(!is_dir($webRoot . $dir)){
mkdir($webRoot . $dir);
}
}
// Iterate over all ebooks to build the various feeds.
foreach(Ebook::GetAll() as $ebook){
if($ebook->IsPlaceholder()){
continue;
}
$allEbooks[] = $ebook;
$newestEbooks[] = $ebook;
foreach($ebook->Tags as $tag){
$urlName = Formatter::MakeUrlSafe($tag->Name);
$ebooksBySubject[$urlName][] = $ebook;
$subjects[$urlName] = ['id' => $urlName, 'name' => $tag->Name, 'sortedname' => $tag->Name];
}
foreach($ebook->CollectionMemberships as $cm){
$collection = $cm->Collection;
$urlName = Formatter::MakeUrlSafe($collection->Name);
$ebooksByCollection[$urlName][] = $ebook;
$collections[$urlName] = ['id' => $urlName, 'name' => $collection->Name, 'sortedname' => $collection->GetSortedName()];
}
$authorsUrl = preg_replace('|^/ebooks/|', '', $ebook->AuthorsUrl);
$ebooksByAuthor[$authorsUrl][] = $ebook;
$authors[$authorsUrl] = ['id' => $authorsUrl, 'name' => strip_tags($ebook->AuthorsHtml), 'sortedname' => $ebook->Authors[0]->SortName ?? $ebook->Authors[0]->Name];
}
usort($allEbooks, 'SortByUpdatedDesc');
usort($newestEbooks, function($a, $b){ return $b->EbookCreated <=> $a->EbookCreated; });
$newestEbooks = array_slice($newestEbooks, 0, $ebooksPerNewestEbooksFeed);
// Create OPDS feeds.
$opdsRootEntries = [
new OpdsNavigationEntry(
'Newest Standard Ebooks',
'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.',
'/feeds/opds/new-releases',
NOW,
'http://opds-spec.org/sort/new',
'acquisition'
),
new OpdsNavigationEntry(
'Standard Ebooks by Subject',
'Browse Standard Ebooks by subject.',
'/feeds/opds/subjects',
NOW,
'subsection',
'navigation'),
new OpdsNavigationEntry(
'Standard Ebooks by Collection',
'Browse Standard Ebooks by collection.',
'/feeds/opds/collections',
NOW,
'subsection',
'navigation'),
new OpdsNavigationEntry(
'Standard Ebooks by Author',
'Browse Standard Ebooks by author.',
'/feeds/opds/authors',
NOW,
'subsection',
'navigation'),
new OpdsNavigationEntry(
'All Standard Ebooks',
'All Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.',
'/feeds/opds/all',
NOW,
'http://opds-spec.org/crawlable',
'acquisition')
];
$opdsRoot = new OpdsNavigationFeed('Standard Ebooks', 'The Standard Ebooks catalog.', '/feeds/opds', $webRoot . '/feeds/opds/index.xml', $opdsRootEntries, null);
SaveFeed($opdsRoot, $force, null, null, NOW);
// Create the Subjects feeds.
CreateOpdsCollectionFeed(Enums\FeedCollectionType::Subjects, '/feeds/opds/subjects', 'Standard Ebooks in the “%s” subject, most-recently-released first.', $subjects, $ebooksBySubject, $webRoot, $opdsRoot, $force);
// Create the Collections feeds.
CreateOpdsCollectionFeed(Enums\FeedCollectionType::Collections, '/feeds/opds/collections', 'Standard Ebooks in the “%s” collection, most-recently-released first.', $collections, $ebooksByCollection, $webRoot, $opdsRoot, $force);
// Create the Author feeds.
CreateOpdsCollectionFeed(Enums\FeedCollectionType::Authors, '/feeds/opds/authors', 'Standard Ebooks by %s, most-recently-released first.', $authors, $ebooksByAuthor, $webRoot, $opdsRoot, $force);
// Create the All feed.
$allFeed = new OpdsAcquisitionFeed('All Standard Ebooks', 'All Standard Ebooks, most-recently-updated first. This is a Complete Acquisition Feed as defined in OPDS 1.2 §2.5.', '/feeds/opds/all', $webRoot . '/feeds/opds/all.xml', $allEbooks, $opdsRoot, true);
SaveFeed($allFeed, $force, null, null, NOW);
// Create the Newest feed.
$newestFeed = new OpdsAcquisitionFeed('Newest Standard Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/opds/new-releases', $webRoot . '/feeds/opds/new-releases.xml', $newestEbooks, $opdsRoot);
SaveFeed($newestFeed, $force, null, null, NOW);
// Create RSS/Atom feeds.
// Create the RSS All feed.
$allRssFeed = new RssFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/rss/all', $webRoot . '/feeds/rss/all.xml', $allEbooks);
SaveFeed($allRssFeed, $force, null, null);
// Create the RSS Newest feed.
$newestRssFeed = new RssFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/rss/new-releases', $webRoot . '/feeds/rss/new-releases.xml', $newestEbooks);
SaveFeed($newestRssFeed, $force, null, null);
// Create the Atom All feed.
$allAtomFeed = new AtomFeed('Standard Ebooks - All Ebooks', 'All Standard Ebooks, most-recently-released first.', '/feeds/atom/all', $webRoot . '/feeds/atom/all.xml', $allEbooks);
SaveFeed($allAtomFeed, $force, null, null, NOW);
// Create the Atom Newest feed.
$newestAtomFeed = new AtomFeed('Standard Ebooks - Newest Ebooks', 'The ' . number_format($ebooksPerNewestEbooksFeed) . ' latest Standard Ebooks, most-recently-released first.', '/feeds/atom/new-releases', $webRoot . '/feeds/atom/new-releases.xml', $newestEbooks);
SaveFeed($newestAtomFeed, $force, null, null, NOW);
// Generate each individual subject feed.
foreach($ebooksBySubject as $subject => $ebooks){
usort($ebooks, 'SortByUpdatedDesc');
$title = 'Standard Ebooks - ' . $subjects[$subject]['name'] . ' Ebooks';
$subtitle = 'Standard Ebooks in the “' . strtolower($subjects[$subject]['name']) . '” subject, most-recently-released first.';
$subjectRssFeed = new RssFeed($title, $subtitle, '/feeds/rss/subjects/' . Formatter::MakeUrlSafe($subject), $webRoot . '/feeds/rss/subjects/' . Formatter::MakeUrlSafe($subject) . '.xml', $ebooks);
SaveFeed($subjectRssFeed, $force, $subjects[$subject]['name'], $subjects[$subject]['sortedname']);
$subjectAtomFeed = new AtomFeed($title, $subtitle, '/feeds/atom/subjects/' . Formatter::MakeUrlSafe($subject), $webRoot . '/feeds/atom/subjects/' . Formatter::MakeUrlSafe($subject) . '.xml', $ebooks);
SaveFeed($subjectAtomFeed, $force, $subjects[$subject]['name'], $subjects[$subject]['sortedname'], NOW);
}
// Generate each individual collection feed.
foreach($ebooksByCollection as $collection => $ebooks){
usort($ebooks, 'SortByUpdatedDesc');
$titleName = preg_replace('/^The /ius', '', $collections[$collection]['name']);
$title ='Standard Ebooks - Ebooks in the ' . $titleName . ' collection';
$subtitle = 'Standard Ebooks in the ' . $titleName . ' collection, most-recently-released first.';
$collectionRssFeed = new RssFeed($title, $subtitle, '/feeds/rss/collections/' . Formatter::MakeUrlSafe($collection), $webRoot . '/feeds/rss/collections/' . Formatter::MakeUrlSafe($collection) . '.xml', $ebooks);
SaveFeed($collectionRssFeed, $force, $collections[$collection]['name'], $collections[$collection]['sortedname']);
$collectionAtomFeed = new AtomFeed($title, $subtitle, '/feeds/atom/collections/' . Formatter::MakeUrlSafe($collection), $webRoot . '/feeds/atom/collections/' . Formatter::MakeUrlSafe($collection) . '.xml', $ebooks);
SaveFeed($collectionAtomFeed, $force, $collections[$collection]['name'], $collections[$collection]['sortedname'], NOW);
}
// Generate each individual author feed.
foreach($ebooksByAuthor as $collection => $ebooks){
usort($ebooks, 'SortByUpdatedDesc');
$title = 'Standard Ebooks - Ebooks by ' . $authors[$collection]['name'];
$subtitle = 'Standard Ebooks by ' . $authors[$collection]['name'] . ', most-recently-released first.';
$collectionRssFeed = new RssFeed($title, $subtitle, '/feeds/rss/authors/' . $authors[$collection]['id'], $webRoot . '/feeds/rss/authors/' . $authors[$collection]['id'] . '.xml', $ebooks);
SaveFeed($collectionRssFeed, $force, $authors[$collection]['name'], $authors[$collection]['sortedname']);
$collectionAtomFeed = new AtomFeed($title, $subtitle, '/feeds/atom/authors/' . $authors[$collection]['id'], $webRoot . '/feeds/atom/authors/' . $authors[$collection]['id'] . '.xml', $ebooks);
SaveFeed($collectionAtomFeed, $force, $authors[$collection]['name'], $authors[$collection]['sortedname'], NOW);
}
// Set ownership and permissions.
// We don't use PHP's built in chown/chmod chmod can't accept strings.
// The `chmod +X` command, with a capital X, makes only matched directories executable.
exec('sudo chown --preserve-root --recursive se:committers ' . escapeshellarg($webRoot) . '/feeds/*/*.xml');
exec('sudo chown --preserve-root --recursive se:committers ' . escapeshellarg($webRoot) . '/feeds/*/*/*.xml');
exec('sudo chmod --preserve-root --recursive a+r,ug+w,a+X ' . escapeshellarg($webRoot) . '/feeds/*/');