mirror of
https://github.com/standardebooks/web.git
synced 2025-07-05 14:20:29 -04:00
249 lines
8.3 KiB
PHP
Executable file
249 lines
8.3 KiB
PHP
Executable file
#!/usr/bin/php
|
|
<?
|
|
require_once('/standardebooks.org/web/lib/Core.php');
|
|
|
|
use function Safe\copy;
|
|
use function Safe\exec;
|
|
use function Safe\file_get_contents;
|
|
use function Safe\filemtime;
|
|
use function Safe\getopt;
|
|
use function Safe\glob;
|
|
use function Safe\mkdir;
|
|
use function Safe\opendir;
|
|
use function Safe\preg_replace;
|
|
use function Safe\rmdir;
|
|
use function Safe\tempnam;
|
|
use function Safe\unlink;
|
|
|
|
$longopts = ['webroot:'];
|
|
$options = getopt('', $longopts);
|
|
$webRoot = $options['webroot'] ?? WEB_ROOT;
|
|
|
|
$types = ['epub', 'epub-advanced', 'azw3', 'kepub', 'xhtml'];
|
|
$groups = ['collections', 'subjects', 'authors', 'months'];
|
|
$ebooksByGroup = ['collections' => [], 'subjects' => [], 'authors' => [], 'months' => []];
|
|
|
|
/**
|
|
* @see https://www.php.net/manual/en/function.rmdir.php#117354
|
|
*/
|
|
function rrmdir(string $src): void{
|
|
$dir = opendir($src);
|
|
while(false !== ($file = readdir($dir))){
|
|
if (($file != '.') && ($file != '..')){
|
|
$full = $src . '/' . $file;
|
|
if(is_dir($full)){
|
|
rrmdir($full);
|
|
}
|
|
else{
|
|
@unlink($full);
|
|
}
|
|
}
|
|
}
|
|
|
|
closedir($dir);
|
|
rmdir($src);
|
|
}
|
|
|
|
/**
|
|
* @param array<Ebook> $ebooks
|
|
*/
|
|
function CreateZip(string $filePath, array $ebooks, string $type, string $webRoot): void{
|
|
$tempFilename = tempnam(sys_get_temp_dir(), "se-ebooks");
|
|
|
|
$zip = new ZipArchive();
|
|
|
|
if($zip->open($tempFilename, ZipArchive::OVERWRITE) !== true){
|
|
print('Can\'t open file: ' . $tempFilename . "\n");
|
|
}
|
|
|
|
foreach($ebooks as $ebook){
|
|
if($type == 'epub' && $ebook->EpubUrl !== null){
|
|
$ebookFilePath = $webRoot . '/' . $ebook->EpubUrl;
|
|
$zip->addFile($ebookFilePath, basename($ebookFilePath));
|
|
}
|
|
|
|
if($type == 'azw3' && $ebook->Azw3Url !== null){
|
|
$ebookFilePath = $webRoot . '/' . $ebook->Azw3Url;
|
|
$folderName = basename($ebookFilePath, '.azw3');
|
|
$zip->addFile($ebookFilePath, $folderName . '/' . basename($ebookFilePath));
|
|
|
|
if($ebook->KindleCoverUrl !== null){
|
|
$ebookThumbnailPath = $webRoot . '/' . $ebook->KindleCoverUrl;
|
|
$zip->addFile($ebookThumbnailPath, $folderName . '/' . basename($ebookThumbnailPath));
|
|
}
|
|
}
|
|
|
|
if($type == 'kepub' && $ebook->KepubUrl !== null){
|
|
$ebookFilePath = $webRoot . '/' . $ebook->KepubUrl;
|
|
$zip->addFile($ebookFilePath, basename($ebookFilePath));
|
|
}
|
|
|
|
if($type == 'epub-advanced' && $ebook->AdvancedEpubUrl !== null){
|
|
$ebookFilePath = $webRoot . '/' . $ebook->AdvancedEpubUrl;
|
|
$zip->addFile($ebookFilePath, basename($ebookFilePath));
|
|
}
|
|
|
|
if($type == 'xhtml' && $ebook->TextSinglePageUrl !== null){
|
|
$ebookFilePath = $webRoot . '/' . $ebook->TextSinglePageUrl . '.xhtml';
|
|
|
|
// Strip the navigation header that was added as part of the deploy process
|
|
$xhtml = file_get_contents($ebookFilePath);
|
|
$xhtml = preg_replace('|<body><header><nav>.+?</nav></header>|ius', '<body>', $xhtml);
|
|
|
|
$zip->addFromString(str_replace('single-page', $ebook->UrlSafeIdentifier, basename($ebookFilePath)), $xhtml);
|
|
}
|
|
}
|
|
|
|
$zip->close();
|
|
|
|
// We have to do a copy, then unlink because `rename()` can't rename across file systems.
|
|
// If the bulk downloads are symlinked to a storage volume, then `rename()` won't work.
|
|
copy($tempFilename, $filePath);
|
|
@unlink($tempFilename);
|
|
|
|
exec('attr -q -s se-ebook-type -V ' . escapeshellarg($type) . ' ' . escapeshellarg($filePath));
|
|
}
|
|
|
|
// Iterate over all ebooks and arrange them by publication month.
|
|
foreach(Ebook::GetAll() as $ebook){
|
|
if($ebook->IsPlaceholder()){
|
|
continue;
|
|
}
|
|
|
|
if($ebook->EbookCreated === null || $ebook->EbookUpdated === null){
|
|
continue;
|
|
}
|
|
|
|
$timestamp = $ebook->EbookCreated->format('Y-m');
|
|
$updatedTimestamp = $ebook->EbookUpdated->getTimestamp();
|
|
|
|
// Add to the 'ebooks by month' list.
|
|
if(!isset($ebooksByGroup['months'][$timestamp])){
|
|
$obj = new stdClass();
|
|
$obj->Label = $timestamp;
|
|
$obj->LabelSort = $timestamp;
|
|
$obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label);
|
|
$obj->Updated = $updatedTimestamp;
|
|
$obj->Ebooks = [$ebook];
|
|
|
|
$ebooksByGroup['months'][$timestamp] = $obj;
|
|
}
|
|
else{
|
|
$ebooksByGroup['months'][$timestamp]->Ebooks[] = $ebook;
|
|
if($updatedTimestamp > $ebooksByGroup['months'][$timestamp]->Updated){
|
|
$ebooksByGroup['months'][$timestamp]->Updated = $updatedTimestamp;
|
|
}
|
|
}
|
|
|
|
// Add to the 'books by subject' list.
|
|
foreach($ebook->Tags as $tag){
|
|
if(!isset($ebooksByGroup['subjects'][$tag->Name])){
|
|
$obj = new stdClass();
|
|
$obj->Label = $tag->Name;
|
|
$obj->LabelSort = $tag->Name;
|
|
$obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label);
|
|
$obj->Updated = $updatedTimestamp;
|
|
$obj->Ebooks = [$ebook];
|
|
|
|
$ebooksByGroup['subjects'][$tag->Name] = $obj;
|
|
}
|
|
else{
|
|
$ebooksByGroup['subjects'][$tag->Name]->Ebooks[] = $ebook;
|
|
if($updatedTimestamp > $ebooksByGroup['subjects'][$tag->Name]->Updated){
|
|
$ebooksByGroup['subjects'][$tag->Name]->Updated = $updatedTimestamp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add to the 'books by collection' list.
|
|
foreach($ebook->CollectionMemberships as $cm){
|
|
$collection = $cm->Collection;
|
|
if(!isset($ebooksByGroup['collections'][$collection->Name])){
|
|
$obj = new stdClass();
|
|
$obj->Label = $collection->Name;
|
|
$obj->LabelSort = $collection->GetSortedName();
|
|
$obj->UrlLabel = Formatter::MakeUrlSafe($obj->Label);
|
|
$obj->Updated = $updatedTimestamp;
|
|
$obj->Ebooks = [$ebook];
|
|
|
|
$ebooksByGroup['collections'][$collection->Name] = $obj;
|
|
}
|
|
else{
|
|
$ebooksByGroup['collections'][$collection->Name]->Ebooks[] = $ebook;
|
|
if($updatedTimestamp > $ebooksByGroup['collections'][$collection->Name]->Updated){
|
|
$ebooksByGroup['collections'][$collection->Name]->Updated = $updatedTimestamp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add to the 'books by author' list.
|
|
// We have to index by `UrlName` for cases like `Samuel Butler` whose `UrlName` is `samuel-butler-1612-1680`.
|
|
$authorsUrl = preg_replace('|^/ebooks/|', '', $ebook->AuthorsUrl);
|
|
if(!isset($ebooksByGroup['authors'][$authorsUrl])){
|
|
$obj = new stdClass();
|
|
$obj->Label = strip_tags($ebook->AuthorsHtml);
|
|
$obj->LabelSort = $ebook->Authors[0]->SortName;
|
|
$obj->UrlLabel = $authorsUrl;
|
|
$obj->Updated = $updatedTimestamp;
|
|
$obj->Ebooks = [$ebook];
|
|
|
|
$ebooksByGroup['authors'][$authorsUrl] = $obj;
|
|
}
|
|
else{
|
|
$ebooksByGroup['authors'][$authorsUrl]->Ebooks[] = $ebook;
|
|
if($updatedTimestamp > $ebooksByGroup['authors'][$authorsUrl]->Updated){
|
|
$ebooksByGroup['authors'][$authorsUrl]->Updated = $updatedTimestamp;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach($groups as $group){
|
|
// First delete any orphan directories that we don't expect to be here, for example a collection that was later renamed.
|
|
foreach(glob($webRoot . '/bulk-downloads/' . $group . '/*/') as $dir){
|
|
$expected = false;
|
|
foreach($ebooksByGroup[$group] as $collection){
|
|
if($collection->UrlLabel == basename($dir)){
|
|
$expected = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(!$expected){
|
|
print('Removing ' . $dir . "\n");
|
|
rrmdir($dir);
|
|
}
|
|
}
|
|
|
|
// Now create the zip files!
|
|
foreach($ebooksByGroup[$group] as $collection){
|
|
$parentDir = $webRoot . '/bulk-downloads/' . $group . '/' . $collection->UrlLabel;
|
|
|
|
if(!is_dir($parentDir)){
|
|
mkdir($parentDir, 0775, true);
|
|
}
|
|
|
|
exec('attr -q -s se-ebook-count -V ' . escapeshellarg((string)sizeof($collection->Ebooks)) . ' ' . escapeshellarg($parentDir));
|
|
exec('attr -q -s se-label -V ' . escapeshellarg($collection->Label) . ' ' . escapeshellarg($parentDir));
|
|
exec('attr -q -s se-label-sort -V ' . escapeshellarg($collection->LabelSort) . ' ' . escapeshellarg($parentDir));
|
|
|
|
// We also need to save the URL label for author edge cases like `Samuel Butler` -> `samuel-butler-1612-1680` or `Karl Marx and Freidrich Engels` -> `karl-marx_friedrich-engels`.
|
|
exec('attr -q -s se-url-label -V ' . escapeshellarg($collection->UrlLabel) . ' ' . escapeshellarg($parentDir));
|
|
|
|
foreach($types as $type){
|
|
$filePath = $parentDir . '/se-ebooks-' . $collection->UrlLabel . '-' . $type . '.zip';
|
|
|
|
// If the file doesn't exist, or if the `content.opf` last updated time is newer than the file modification time.
|
|
if(!file_exists($filePath) || filemtime($filePath) < $collection->Updated){
|
|
print('Creating ' . $filePath . "\n");
|
|
|
|
CreateZip($filePath, $collection->Ebooks, $type, $webRoot);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set ownership and permissions.
|
|
// We don't use PHP's built in `chown()`/`chmod()` because `chmod()` can't accept strings.
|
|
// The `chmod +X` command, with a capital `X`, makes only matched directories executable.
|
|
exec('sudo chown --preserve-root --recursive se:committers ' . escapeshellarg($webRoot) . '/bulk-downloads/*/');
|
|
exec('sudo chmod --preserve-root --recursive a+r,ug+w,a+X ' . escapeshellarg($webRoot) . '/bulk-downloads/*/');
|