Add some server-side infrastructure scripts

This commit is contained in:
Alex Cabal 2019-05-21 16:38:34 -05:00
parent d7d2fdbe3c
commit d395ab9c7a
4 changed files with 374 additions and 0 deletions

166
scripts/deploy-ebook-to-www Executable file
View file

@ -0,0 +1,166 @@
#!/bin/bash
usage(){
fmt <<EOF
DESCRIPTION
Deploy a Standard Ebook source repository to the web.
USAGE
deploy-ebook-to-www [-v,--verbose] DIRECTORY [DIRECTORY...]
DIRECTORY is a bare source repository.
EOF
exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
if [ $# -eq 1 ]; then if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then usage; fi fi
# End boilerplate
# Check for dependencies
require "convert" "Try: apt-get install imagemagick"
require "rsvg-convert" "Try: apt-get install librsvg2-bin"
verbose="false"
if [ $# -eq 0 ]; then
usage
fi
for var in "$@"
do
if [ "${var}" = "-v" ] || [ "${var}" = "--verbose" ]; then
verbose="true"
break
fi
done
for dir in "$@"
do
if [ "${dir}" = "" ] || [ "${dir}" = "-v" ] || [ "${dir}" = "--verbose" ]; then
continue
fi
repoDir=$(realpath "${dir%/}")
baseName=$(basename "${repoDir}")
if [ "${baseName}" = ".git" ]; then
continue
fi
if [ ! -d "${repoDir}" ]; then
die "Invalid repo root: ${repoDir}"
fi
cd "${repoDir}" || return
if [ "${verbose}" = "true" ]; then
printf "Entering %s\n" "${repoDir}"
fi
if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
printf "Looks like a draft ebook, skipping\n"
continue
fi
webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")
if [ "${webDir}" = "" ]; then
die "Empty webdir!"
fi
workDir=$(mktemp -d)
imgWorkDir=$(mktemp -d)
webDir="/standardebooks.org/www/ebooks/${webDir}"
if [ "${verbose}" = "true" ]; then
printf "Generating cover image for web ... "
fi
urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")
# Build the hero image for individual ebook pages
git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
cp "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg"
# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078
convert -resize "1078" -crop "1078x359+0+539" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
convert -resize "2156" -crop "2156x718+0+1078" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"
# Build the cover image thumbnail
git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"
sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"
cp "${imgWorkDir}/${urlSafeIdentifier}.svg" "${imgWorkDir}/${urlSafeIdentifier}@2x.svg"
# Due to a bug in `convert` we have to use rsvg-convert to convert SVG to PNG, then work on the PNG with `convert`.
rsvg-convert --keep-aspect-ratio --format png --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
rsvg-convert --keep-aspect-ratio --format png --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}@2x.svg"
# Resize and compress the image (formula from Google Page Speed Insights)
convert -resize "196" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.jpg"
convert -resize "392" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg"
mv "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"
mv "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
sudo chgrp --preserve-root --recursive se "${imgWorkDir}/${urlSafeIdentifier}"*
sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
printf "Building ebook ... "
fi
git clone --quiet "${repoDir}" "${workDir}"
mkdir "${workDir}/dist"
# Build the ebook
if ! se build --output-dir="${workDir}/dist" --check --kindle --kobo --covers "${workDir}"; then
rm --preserve-root --recursive --force "${workDir}"
die "Error building ebook, stopping deployment."
fi
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
# Delete the contents of the old webdir
rm --preserve-root --recursive --force "${webDir}"
# Re-create the webdir
mkdir -p "${webDir}"
# Move contents of the work dir over
mv "${workDir}"/* "${webDir}/"
# Move the cover images over
mv "${imgWorkDir}/${urlSafeIdentifier}"*.jpg "/standardebooks.org/www/images/covers/"
# Delete the now-empty work dir (empty except for .git)
rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"
sudo chgrp --preserve-root --recursive se "${webDir}"
sudo chmod --preserve-root --recursive g+ws "${webDir}"
if [ "${verbose}" = "true" ]; then
printf "Flushing PHP-FPM opcache and apcu cache ... "
fi
/standardebooks.org/scripts/reset-php-fpm-opcache standardebooks.org
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
done
# Build the OPDS catalog
if [ "${verbose}" = "true" ]; then
printf "Rebuilding OPDS catalog ... "
fi
bash -c "php /standardebooks.org/scripts/generate-opds.php > /standardebooks.org/www/opds/all.xml; export XMLLINT_INDENT=\$(printf \"\\t\") && xmllint --c14n /standardebooks.org/www/opds/all.xml | (printf \"%s\\n\" \"<?xml version=\\\"1.0\\\" encoding=\\\"utf-8\\\"?>\" && cat) | xmllint --output /standardebooks.org/www/opds/all.xml --format -"
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi

105
scripts/generate-opds.php Executable file
View file

@ -0,0 +1,105 @@
<?
$contentFiles = explode("\n", trim(shell_exec('find /standardebooks.org/www/ebooks/ -name "content.opf" | sort') ?? ''));
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/">
<id>https://standardebooks.org/opds/all</id>
<link href="https://standardebooks.org/opds/all" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<link href="https://standardebooks.org/opds/" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<title>All Standard Ebooks</title>
<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
<icon>https://standardebooks.org/images/logo.png</icon>
<updated><?= gmdate('Y-m-d\TH:i:s\Z') ?></updated>
<author>
<name>Standard Ebooks</name>
<uri>https://standardebooks.org</uri>
</author>
<? foreach($contentFiles as $path){
if($path == '')
continue;
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$authors = array();
$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
$url = preg_replace('/^url:/ius', '', (string)array_shift($temp)) ?? '';
$relativeUrl = preg_replace('/^https:\/\/standardebooks.org/ius', '', $url) ?? '';
$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
$title = array_shift($temp);
$temp = $xml->xpath('/package/metadata/meta[@property="se:long-description"]') ?: [];
$longDescription = array_shift($temp);
$authors = $xml->xpath('/package/metadata/dc:creator') ?: [];
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
$published = array_shift($temp);
$temp = $xml->xpath('/package/metadata/dc:language') ?: [];
$language = array_shift($temp);
$temp = $xml->xpath('/package/metadata/meta[@property="dcterms:modified"]') ?: [];
$modified = array_shift($temp);
$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
$description = array_shift($temp);
$subjects = $xml->xpath('/package/metadata/dc:subject') ?: [];
$sources = $xml->xpath('/package/metadata/dc:source') ?: [];
$filesystemPath = preg_replace('/\/src\/epub\/content.opf$/ius', '', $path) ?? '';
$temp = glob($filesystemPath . '/dist/*.epub');
$epubFilename = preg_replace('/(\|\.epub)/ius', '', preg_replace('/.+\//ius', '', array_shift($temp) ?? '') ?? '') ?? '';
$temp = glob($filesystemPath . '/dist/*.azw3');
$kindleFilename = preg_replace('/.+\//ius', '', array_shift($temp) ?? '') ?? '';
?>
<entry>
<id><?= $url ?></id>
<title><?= $title ?></title>
<? foreach($authors as $author){
$id = '';
if($author->attributes() !== null){
$id = $author->attributes()->id;
}
$temp = $xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]') ?: [];
$wikiUrl = array_shift($temp);
$temp = $xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]') ?: [];
$fullName = array_shift($temp);
$temp = $xml->xpath('/package/metadata/meta[@property="se:url.authority.nacoaf"][@refines="#' . $id . '"]') ?: [];
$nacoafLink = array_shift($temp);
?>
<author>
<name><?= $author ?></name>
<? if($wikiUrl !== null){ ?><uri><?= $wikiUrl ?></uri><? } ?>
<? if($fullName !== null){ ?><schema:alternateName><?= $fullName ?></schema:alternateName><? } ?>
<? if($nacoafLink !== null){ ?><schema:sameAs><?= $nacoafLink ?></schema:sameAs><? } ?>
</author>
<? } ?>
<published><?= $published ?></published>
<updated><?= $modified ?></updated>
<dc:language><?= $language ?></dc:language>
<dc:publisher>Standard Ebooks</dc:publisher>
<? foreach($sources as $source){ ?>
<dc:source><?= $source ?></dc:source>
<? } ?>
<rights>Public domain in the United States; original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication</rights>
<summary type="text"><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></summary>
<content type="text/html"><?= $longDescription ?></content>
<? foreach($subjects as $subject){ ?>
<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES, 'UTF-8') ?>"/>
<? } ?>
<link href="<?= $relativeUrl ?>/dist/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
<link href="<?= $relativeUrl ?>/dist/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>
<link href="<?= $relativeUrl ?>/src/epub/images/cover.svg" rel="http://opds-spec.org/image" type="image/svg+xml"/>
<link href="<?= $relativeUrl ?>/dist/<?= $epubFilename ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/epub+zip"/>
<link href="<?= $relativeUrl ?>/dist/<?= $epubFilename ?>3" rel="http://opds-spec.org/acquisition/open-access" type="application/epub+zip"/>
<link href="<?= $relativeUrl ?>/dist/<?= preg_replace('/\.epub$/ius', '.kepub.epub', $epubFilename) ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/kepub+zip"/>
<link href="<?= $relativeUrl ?>/dist/<?= $kindleFilename ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/x-mobipocket-ebook"/>
</entry>
<? } ?>
</feed>

73
scripts/generate-rss.php Normal file
View file

@ -0,0 +1,73 @@
<?
$rssLength = 30;
$contentFiles = explode("\n", trim(shell_exec('find /standardebooks.org/www/ebooks/ -name "content.opf" | sort') ?? ''));
$sortedContentFiles = array();
foreach($contentFiles as $path){
if($path == '')
continue;
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
$publishedTimestamp = strtotime(array_shift($temp));
$sortedContentFiles[$publishedTimestamp] = $xml;
}
krsort($sortedContentFiles);
$sortedContentFiles = array_slice($sortedContentFiles, 0, $rssLength);
print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Standard Ebooks - New Releases</title>
<link>https://standardebooks.org</link>
<description>A list of the <?= number_format($rssLength) ?> latest Standard Ebooks ebook releases, most-recently-released first.</description>
<language>en-US</language>
<copyright>https://creativecommons.org/publicdomain/zero/1.0/</copyright>
<lastBuildDate><?= gmdate('D, d M Y H:i:s +0000') ?></lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<atom:link href="https://standardebooks.org/rss/new-releases" rel="self" type="application/rss+xml" />
<image>
<url>https://standardebooks.org/images/logo-rss.png</url>
<title>Standard Ebooks - New Releases</title>
<description>The Standard Ebooks logo</description>
<link>https://standardebooks.org</link>
<height>144</height>
<width>144</width>
</image>
<? foreach($sortedContentFiles as $xml){
$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
$url = preg_replace('/^url:/ius', '', (string)array_shift($temp) ?? '') ?? '';
$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
$title = array_shift($temp) ?? '';
$temp = $xml->xpath('/package/metadata/dc:creator') ?: [];
$title .= ', by ' . array_shift($temp) ?? '';
$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
$description = array_shift($temp) ?? '';
$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
$published = gmdate('D, d M Y H:i:s +0000', strtotime(array_shift($temp) ?? '') ?: 0);
$seSubjects = $xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [];
?><item>
<title><?= $title ?></title>
<link><?= $url ?></link>
<description><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></description>
<pubDate><?= $published ?></pubDate>
<guid><?= $url ?></guid>
<? foreach($seSubjects as $seSubject){ ?>
<category domain="standardebooks.org"><?= htmlspecialchars($seSubject, ENT_QUOTES, 'UTF-8') ?></category>
<? } ?>
</item>
<? } ?>
</channel>
</rss>

30
scripts/reset-php-fpm-opcache Executable file
View file

@ -0,0 +1,30 @@
#!/bin/bash
usage(){
echo -n
fmt <<EOF
DESCRIPTION
Flush the PHP-FPM opcache and the APCu user cache without reloading or resetting the PHP-FPM service.
USAGE
reset-php-fpm-opcache POOL_NAME
EOF
exit 1
}
if [ $# -eq 1 ]; then
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
usage
fi
fi
if [ $# -ne 1 ]; then
usage
fi
# If this script is run by a user without sudo powers, they can be given for this command by creating a file in sudoers.d with:
# MY_USERNAME ALL=(www-data) NOPASSWD: /usr/bin/env SCRIPT_FILENAME=/tmp/php-fpm-opcache-reset.php REQUEST_METHOD=GET cgi-fcgi -bind -connect *
echo '<?php opcache_reset(); if(function_exists("apcu_clear_cache")){ apcu_clear_cache(); } ?>' > /tmp/php-fpm-opcache-reset.php
sudo -u www-data env SCRIPT_FILENAME=/tmp/php-fpm-opcache-reset.php REQUEST_METHOD=GET cgi-fcgi -bind -connect "/run/php/$1.sock" &> /dev/null
rm /tmp/php-fpm-opcache-reset.php