Add some server-side infrastructure scripts

2025-07-05 22:30:30 -04:00 · 2019-05-21 16:38:34 -05:00 · 2019-05-21 16:38:34 -05:00 · d395ab9c7a
commit d395ab9c7a
parent d7d2fdbe3c
4 changed files with 374 additions and 0 deletions
--- a/scripts/deploy-ebook-to-www
+++ b/scripts/deploy-ebook-to-www
@ -0,0 +1,166 @@
 #!/bin/bash
 usage(){
 	fmt <<EOF
 DESCRIPTION
 	Deploy a Standard Ebook source repository to the web.
 USAGE
 	deploy-ebook-to-www [-v,--verbose] DIRECTORY [DIRECTORY...]
 		DIRECTORY is a bare source repository.
 EOF
 	exit
 }
 die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
 require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
 if [ $#  -eq 1 ]; then if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then usage; fi fi
 # End boilerplate
 # Check for dependencies
 require "convert" "Try: apt-get install imagemagick"
 require "rsvg-convert" "Try: apt-get install librsvg2-bin"
 verbose="false"
 if [ $#  -eq 0 ]; then
 	usage
 fi
 for var in "$@"
 do
 	if [ "${var}" = "-v" ] || [ "${var}" = "--verbose" ]; then
 		verbose="true"
 		break
 	fi
 done
 for dir in "$@"
 do
 	if [ "${dir}" = "" ] || [ "${dir}" = "-v" ] || [ "${dir}" = "--verbose" ]; then
 		continue
 	fi
 	repoDir=$(realpath "${dir%/}")
 	baseName=$(basename "${repoDir}")
 	if [ "${baseName}" = ".git" ]; then
 		continue
 	fi
 	if [ ! -d "${repoDir}" ]; then
 		die "Invalid repo root: ${repoDir}"
 	fi
 	cd "${repoDir}" || return
 	if [ "${verbose}" = "true" ]; then
 		printf "Entering %s\n" "${repoDir}"
 	fi
 	if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
 		printf "Looks like a draft ebook, skipping\n"
 		continue
 	fi
 	webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")
 	if [ "${webDir}" = "" ]; then
 		die "Empty webdir!"
 	fi
 	workDir=$(mktemp -d)
 	imgWorkDir=$(mktemp -d)
 	webDir="/standardebooks.org/www/ebooks/${webDir}"
 	if [ "${verbose}" = "true" ]; then
 		printf "Generating cover image for web ... "
 	fi
 	urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")
 	# Build the hero image for individual ebook pages
 	git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
 	cp "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg"
 	# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078
 	convert -resize "1078" -crop "1078x359+0+539" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
 	convert -resize "2156" -crop "2156x718+0+1078" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"
 	# Build the cover image thumbnail
 	git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
 	git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"
 	sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"
 	cp "${imgWorkDir}/${urlSafeIdentifier}.svg" "${imgWorkDir}/${urlSafeIdentifier}@2x.svg"
 	# Due to a bug in `convert` we have to use rsvg-convert to convert SVG to PNG, then work on the PNG with `convert`.
 	rsvg-convert --keep-aspect-ratio --format png --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
 	rsvg-convert --keep-aspect-ratio --format png --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}@2x.svg"
 	# Resize and compress the image (formula from Google Page Speed Insights)
 	convert -resize "196" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.jpg"
 	convert -resize "392" -sampling-factor 4:2:0 -strip -quality 75 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg"
 	mv "${imgWorkDir}/${urlSafeIdentifier}@2x.jpg" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"
 	mv "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
 	sudo chgrp --preserve-root --recursive se "${imgWorkDir}/${urlSafeIdentifier}"*
 	sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*
 	if [ "${verbose}" = "true" ]; then
 		printf "Done.\n"
 		printf "Building ebook ... "
 	fi
 	git clone --quiet "${repoDir}" "${workDir}"
 	mkdir "${workDir}/dist"
 	# Build the ebook
 	if ! se build --output-dir="${workDir}/dist" --check --kindle --kobo --covers "${workDir}"; then
 		rm --preserve-root --recursive --force "${workDir}"
 		die "Error building ebook, stopping deployment."
 	fi
 	if [ "${verbose}" = "true" ]; then
 		printf "Done.\n"
 	fi
 	# Delete the contents of the old webdir
 	rm --preserve-root --recursive --force "${webDir}"
 	# Re-create the webdir
 	mkdir -p "${webDir}"
 	# Move contents of the work dir over
 	mv "${workDir}"/* "${webDir}/"
 	# Move the cover images over
 	mv "${imgWorkDir}/${urlSafeIdentifier}"*.jpg "/standardebooks.org/www/images/covers/"
 	# Delete the now-empty work dir (empty except for .git)
 	rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"
 	sudo chgrp --preserve-root --recursive se "${webDir}"
 	sudo chmod --preserve-root --recursive g+ws "${webDir}"
 	if [ "${verbose}" = "true" ]; then
 		printf "Flushing PHP-FPM opcache and apcu cache ... "
 	fi
 	/standardebooks.org/scripts/reset-php-fpm-opcache standardebooks.org
 	if [ "${verbose}" = "true" ]; then
 		printf "Done.\n"
 	fi
 done
 # Build the OPDS catalog
 if [ "${verbose}" = "true" ]; then
 	printf "Rebuilding OPDS catalog ... "
 fi
 bash -c "php /standardebooks.org/scripts/generate-opds.php > /standardebooks.org/www/opds/all.xml; export XMLLINT_INDENT=\$(printf \"\\t\") && xmllint --c14n /standardebooks.org/www/opds/all.xml | (printf \"%s\\n\" \"<?xml version=\\\"1.0\\\" encoding=\\\"utf-8\\\"?>\" && cat) | xmllint --output /standardebooks.org/www/opds/all.xml --format -"
 if [ "${verbose}" = "true" ]; then
 	printf "Done.\n"
 fi
--- a/scripts/generate-opds.php
+++ b/scripts/generate-opds.php
@ -0,0 +1,105 @@
 <?
 $contentFiles = explode("\n", trim(shell_exec('find /standardebooks.org/www/ebooks/ -name "content.opf" | sort') ?? ''));
 print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
 ?>
 <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:schema="http://schema.org/">
 	<id>https://standardebooks.org/opds/all</id>
 	<link href="https://standardebooks.org/opds/all" rel="self" type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
 	<link href="https://standardebooks.org/opds/" rel="start" type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
 	<title>All Standard Ebooks</title>
 	<subtitle>Free and liberated ebooks, carefully produced for the true book lover.</subtitle>
 	<icon>https://standardebooks.org/images/logo.png</icon>
 	<updated><?= gmdate('Y-m-d\TH:i:s\Z') ?></updated>
 	<author>
 		<name>Standard Ebooks</name>
 		<uri>https://standardebooks.org</uri>
 	</author>
 	<? foreach($contentFiles as $path){
 	if($path == '')
 		continue;
 	$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
 	$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
 	$authors = array();
 	$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
 	$url = preg_replace('/^url:/ius', '', (string)array_shift($temp)) ?? '';
 	$relativeUrl = preg_replace('/^https:\/\/standardebooks.org/ius', '', $url) ?? '';
 	$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
 	$title = array_shift($temp);
 	$temp = $xml->xpath('/package/metadata/meta[@property="se:long-description"]') ?: [];
 	$longDescription = array_shift($temp);
 	$authors = $xml->xpath('/package/metadata/dc:creator') ?: [];
 	$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
 	$published = array_shift($temp);
 	$temp = $xml->xpath('/package/metadata/dc:language') ?: [];
 	$language = array_shift($temp);
 	$temp = $xml->xpath('/package/metadata/meta[@property="dcterms:modified"]') ?: [];
 	$modified = array_shift($temp);
 	$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
 	$description = array_shift($temp);
 	$subjects = $xml->xpath('/package/metadata/dc:subject') ?: [];
 	$sources = $xml->xpath('/package/metadata/dc:source') ?: [];
 	$filesystemPath = preg_replace('/\/src\/epub\/content.opf$/ius', '', $path) ?? '';
 	$temp = glob($filesystemPath . '/dist/*.epub');
 	$epubFilename = preg_replace('/(\|\.epub)/ius', '', preg_replace('/.+\//ius', '', array_shift($temp) ?? '') ?? '') ?? '';
 	$temp = glob($filesystemPath . '/dist/*.azw3');
 	$kindleFilename = preg_replace('/.+\//ius', '', array_shift($temp) ?? '') ?? '';
 	?>
 	<entry>
 		<id><?= $url ?></id>
 		<title><?= $title ?></title>
 		<? foreach($authors as $author){
 			$id = '';
 			if($author->attributes() !== null){
 				$id = $author->attributes()->id;
 			}
 			$temp = $xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]') ?: [];
 			$wikiUrl = array_shift($temp);
 			$temp = $xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]') ?: [];
 			$fullName = array_shift($temp);
 			$temp = $xml->xpath('/package/metadata/meta[@property="se:url.authority.nacoaf"][@refines="#' . $id . '"]') ?: [];
 			$nacoafLink = array_shift($temp);
 		?>
 		<author>
 			<name><?= $author ?></name>
 			<? if($wikiUrl !== null){ ?><uri><?= $wikiUrl ?></uri><? } ?>
 			<? if($fullName !== null){ ?><schema:alternateName><?= $fullName ?></schema:alternateName><? } ?>
 			<? if($nacoafLink !== null){ ?><schema:sameAs><?= $nacoafLink ?></schema:sameAs><? } ?>
 		</author>
 		<? } ?>
 		<published><?= $published ?></published>
 		<updated><?= $modified ?></updated>
 		<dc:language><?= $language ?></dc:language>
 		<dc:publisher>Standard Ebooks</dc:publisher>
 		<? foreach($sources as $source){ ?>
 		<dc:source><?= $source ?></dc:source>
 		<? } ?>
 		<rights>Public domain in the United States; original content released to the public domain via the Creative Commons CC0 1.0 Universal Public Domain Dedication</rights>
 		<summary type="text"><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></summary>
 		<content type="text/html"><?= $longDescription ?></content>
 		<? foreach($subjects as $subject){ ?>
 		<category scheme="http://purl.org/dc/terms/LCSH" term="<?= htmlspecialchars($subject, ENT_QUOTES, 'UTF-8') ?>"/>
 		<? } ?>
 		<link href="<?= $relativeUrl ?>/dist/cover.jpg" rel="http://opds-spec.org/image" type="image/jpeg"/>
 		<link href="<?= $relativeUrl ?>/dist/cover-thumbnail.jpg" rel="http://opds-spec.org/image/thumbnail" type="image/jpeg"/>
 		<link href="<?= $relativeUrl ?>/src/epub/images/cover.svg" rel="http://opds-spec.org/image" type="image/svg+xml"/>
 		<link href="<?= $relativeUrl ?>/dist/<?= $epubFilename ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/epub+zip"/>
 		<link href="<?= $relativeUrl ?>/dist/<?= $epubFilename ?>3" rel="http://opds-spec.org/acquisition/open-access" type="application/epub+zip"/>
 		<link href="<?= $relativeUrl ?>/dist/<?= preg_replace('/\.epub$/ius', '.kepub.epub', $epubFilename) ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/kepub+zip"/>
 		<link href="<?= $relativeUrl ?>/dist/<?= $kindleFilename ?>" rel="http://opds-spec.org/acquisition/open-access" type="application/x-mobipocket-ebook"/>
 	</entry>
 	<? } ?>
 </feed>
--- a/scripts/generate-rss.php
+++ b/scripts/generate-rss.php
@ -0,0 +1,73 @@
 <?
 $rssLength = 30;
 $contentFiles = explode("\n", trim(shell_exec('find /standardebooks.org/www/ebooks/ -name "content.opf" | sort') ?? ''));
 $sortedContentFiles = array();
 foreach($contentFiles as $path){
 	if($path == '')
 		continue;
 	$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents("$path") ?: ''));
 	$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
 	$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
 	$publishedTimestamp = strtotime(array_shift($temp));
 	$sortedContentFiles[$publishedTimestamp] = $xml;
 }
 krsort($sortedContentFiles);
 $sortedContentFiles = array_slice($sortedContentFiles, 0, $rssLength);
 print("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
 ?>
 <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
 	<channel>
 		<title>Standard Ebooks - New Releases</title>
 		<link>https://standardebooks.org</link>
 		<description>A list of the <?= number_format($rssLength) ?> latest Standard Ebooks ebook releases, most-recently-released first.</description>
 		<language>en-US</language>
 		<copyright>https://creativecommons.org/publicdomain/zero/1.0/</copyright>
 		<lastBuildDate><?= gmdate('D, d M Y H:i:s +0000') ?></lastBuildDate>
 		<docs>http://blogs.law.harvard.edu/tech/rss</docs>
 		<atom:link href="https://standardebooks.org/rss/new-releases" rel="self" type="application/rss+xml" />
 		<image>
 			<url>https://standardebooks.org/images/logo-rss.png</url>
 			<title>Standard Ebooks - New Releases</title>
 			<description>The Standard Ebooks logo</description>
 			<link>https://standardebooks.org</link>
 			<height>144</height>
 			<width>144</width>
 		</image>
 		<? foreach($sortedContentFiles as $xml){
 			$temp = $xml->xpath('/package/metadata/dc:identifier') ?: [];
 			$url = preg_replace('/^url:/ius', '', (string)array_shift($temp) ?? '') ?? '';
 			$temp = $xml->xpath('/package/metadata/dc:title') ?: [];
 			$title = array_shift($temp) ?? '';
 			$temp = $xml->xpath('/package/metadata/dc:creator') ?: [];
 			$title .= ', by ' . array_shift($temp) ?? '';
 			$temp = $xml->xpath('/package/metadata/dc:description') ?: [];
 			$description = array_shift($temp) ?? '';
 			$temp = $xml->xpath('/package/metadata/dc:date') ?: [];
 			$published = gmdate('D, d M Y H:i:s +0000', strtotime(array_shift($temp) ?? '') ?: 0);
 			$seSubjects = $xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [];
 		?><item>
 			<title><?= $title ?></title>
 			<link><?= $url ?></link>
 			<description><?= htmlspecialchars($description, ENT_QUOTES, 'UTF-8') ?></description>
 			<pubDate><?= $published ?></pubDate>
 			<guid><?= $url ?></guid>
 			<? foreach($seSubjects as $seSubject){ ?>
 			<category domain="standardebooks.org"><?= htmlspecialchars($seSubject, ENT_QUOTES, 'UTF-8') ?></category>
 			<? } ?>
 		</item>
 		<? } ?>
 	</channel>
 </rss>
--- a/scripts/reset-php-fpm-opcache
+++ b/scripts/reset-php-fpm-opcache
@ -0,0 +1,30 @@
 #!/bin/bash
 usage(){
 	echo -n
 	fmt <<EOF
 DESCRIPTION
 	Flush the PHP-FPM opcache and the APCu user cache without reloading or resetting the PHP-FPM service.
 USAGE
 	reset-php-fpm-opcache POOL_NAME
 EOF
 	exit 1
 }
 if [ $# -eq 1 ]; then
 	if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
 		usage
 	fi
 fi
 if [ $# -ne 1 ]; then
 	usage
 fi
 # If this script is run by a user without sudo powers, they can be given for this command by creating a file in sudoers.d with:
 # MY_USERNAME ALL=(www-data) NOPASSWD: /usr/bin/env SCRIPT_FILENAME=/tmp/php-fpm-opcache-reset.php REQUEST_METHOD=GET cgi-fcgi -bind -connect *
 echo '<?php opcache_reset(); if(function_exists("apcu_clear_cache")){ apcu_clear_cache(); } ?>' > /tmp/php-fpm-opcache-reset.php
 sudo -u www-data env SCRIPT_FILENAME=/tmp/php-fpm-opcache-reset.php REQUEST_METHOD=GET cgi-fcgi -bind -connect "/run/php/$1.sock" &> /dev/null
 rm /tmp/php-fpm-opcache-reset.php