web/scripts/deploy-ebook-to-www

485 lines
18 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
usage(){
fmt <<EOF
DESCRIPTION
Deploy a Standard Ebook source repository to the web.
USAGE
deploy-ebook-to-www [-v,--verbose] [-g,--group GROUP] [--webroot WEBROOT] [--weburl WEBURL] [--no-images] [--no-build] [--no-epubcheck] [--no-recompose] [--no-feeds] [--no-bulk-downloads] [-l,--last-push-hash HASH] DIRECTORY [DIRECTORY...]
DIRECTORY is a bare source repository.
GROUP is a groupname. Defaults to "se".
WEBROOT is the path to your webroot. Defaults to "/standardebooks.org/web/www".
WEBURL is the URL the website is served on. Defaults to "https://standardebooks.org".
The deploy process does four things:
1. Build distributable ebook files using 'se build'.
2. Build single-page and toc-based web-readable versions using 'se recompose-epub'.
3. Generate various images and thumbnails for web use.
4. Generates RSS/Atom/OPDS feeds, but only if no other tasks are queued behind this one.
5. Generate bulk download files, but only if no other tasks are queued behind this one.
With --no-images, do not create cover thumbnails or hero images for the web.
With --no-build, do not run 'se build' or 'se recompose' to create distributable ebook files or web-readable versions.
With --no-epubcheck, run 'se build' instead of 'se build --check'.
With --no-recompose, do not run 'se recompose-epub' to generate single-page output.
With --last-push-hash, check the repo head against HASH to see if the cover image or ebook source changed, which will determine if cover thumbnails get re-generated or the ebook gets re-built.
With --no-feeds, don't generate RSS/Atom/OPDS feeds.
With --no-bulk-downloads, don't generate bulk download files.
EOF
exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
# End boilerplate
verbose="false"
images="true"
build="true"
group="se"
webRoot="/standardebooks.org/web/www"
webUrl="https://standardebooks.org"
lastPushHash=""
epubcheck="true"
recompose="true"
feeds="true"
bulkDownloads="true"
if [ $# -eq 0 ]; then
usage
fi
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage ;;
-v|--verbose)
verbose="true"
shift 1
;;
-g|--group)
[ -n "$2" ] || die "Group cant be empty."
group="$2"
shift 2
;;
--webroot)
[ -n "$2" ] || die "Web root cant be empty."
webRoot="$2"
shift 2
;;
--weburl)
[ -n "$2" ] || die "Web URL cant be empty."
webUrl="$2"
shift 2
;;
-l|--last-push-hash)
[ -n "$2" ] || die "Last commit hash cant be empty."
lastPushHash="$2"
shift 2
;;
--no-images)
images="false"
shift 1
;;
--no-epubcheck)
epubcheck="false"
shift 1
;;
--no-recompose)
recompose="false"
shift 1
;;
--no-build)
build="false"
shift 1
;;
--no-feeds)
feeds="false"
shift 1
;;
--no-bulk-downloads)
bulkDownloads="false"
shift 1
;;
*) break ;;
esac
done
if ! [ "$(getent group "${group}")" ]; then
die "Group ${group} does not exist. Either use --group or create the group."
fi
if ! [ -d "${webRoot}" ]; then
die "${webRoot} does not exist or is not a directory."
fi
# Check for dependencies.
require "convert" "Try: apt-get install imagemagick"
require "git" "Try: apt-get install git"
require "php" "Try: apt-get install php-cli"
require "rsvg-convert" "Try: apt-get install librsvg2-bin"
require "se" "Read: https://standardebooks.org/tools"
# `cavif` is compiled via Cargo, see <https://github.com/kornelski/cavif-rs>.
scriptsDir="$(dirname "$(readlink -f "$0")")"
if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
fi
if ! [ -f "${scriptsDir}"/generate-feeds ]; then
die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
fi
if ! [ -f "${scriptsDir}"/generate-bulk-downloads ]; then
die "\"${scriptsDir}\"/generate-bulk-downloads\" is not a file or could not be found."
fi
if ! [ -f "${scriptsDir}/../.venv/bin/activate" ]; then
die "\"${scriptsDir}\"/../.venv/bin/activate\" is not a file or could not be found. Make sure you have installed the \`standardebooks\` package into a Python virtual environment."
fi
mkdir -p "${webRoot}"/images/covers/
for dir in "$@"
do
if [ "${dir}" = "" ]; then
continue
fi
repoDir=$(realpath "${dir%/}")
baseName=$(basename "${repoDir}")
if [ "${baseName}" = ".git" ]; then
continue
fi
if [ ! -d "${repoDir}" ]; then
die "Invalid repo root: ${repoDir}"
fi
if ! pushd "${repoDir}" > /dev/null; then
echo "Couldn't cd into ${repoDir}. Skipping."
continue
fi
if [ "${verbose}" = "true" ]; then
printf "Entering %s\n" "${repoDir}"
fi
if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
printf "Looks like a draft ebook, skipping\n"
continue
fi
urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")
if [ "${lastPushHash}" != "" ]; then
# We were passed the hash of the last push before this one.
# Check to see if the cover image changed, to decide if we want to rebuild the cover image thumbnail/hero.
diff=$(git diff "${lastPushHash}" HEAD)
if [ "${images}" = "true" ]; then
# Always build images if they don't exist, or if they've changed
if [[ ! -f "${webRoot}/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then
images="true"
else
images="false"
fi
fi
if [ "${build}" = "true" ]; then
# Check to see if the actual ebook changed, to decide if we want to build.
# Always build if it doesn't exist.
if [[ ! -f "${webDir}/downloads/*.epub" ]] || [[ "${diff}" =~ diff\ --git\ a/src/ ]]; then
build="true"
else
build="false"
fi
fi
fi
webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")
if [ "${webDir}" = "" ]; then
die "Empty webdir!"
fi
workDir=$(mktemp -d)
imgWorkDir=$(mktemp -d)
webDir="${webRoot}/ebooks/${webDir}"
if [ "${images}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Generating cover image for web ... "
fi
git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"
# We have to `cd` into the work dir, otherwise `convert` won't pick up the relative path of the jpg background in `cover.svg`.
pushd "${imgWorkDir}" > /dev/null || exit
# Build the hero image for individual ebook pages.
# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078.
convert -resize "1318" -crop "1318x439+0+659" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
convert -resize "2636" -crop "2636x860+0+1318" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero.avif"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.avif"
# Build the cover image thumbnail.
# We use JPG instead of SVG for several reasons:
# 1. A JPG is roughly 1/2 the file size of the same SVG, because the SVG must contain the JPG in base64 encoding.
# 2. The "scale up" effect on mouse hover is blurry when used on SVGs.
sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"
# Resize and compress the cover image (formula from Google Page Speed Insights).
# We can't use `convert` directly to do svg -> jpg, as sometimes it fails to load the linked `cover.jpg` within `cover.svg`.
# So, we use `rsvg-convert` to write a png, then `convert` to convert and compress to jpg.
rsvg-convert --width 242 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
rsvg-convert --width 484 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover.avif"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.avif"
sudo chgrp --preserve-root --recursive "${group}" "${imgWorkDir}/${urlSafeIdentifier}"*
sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*
# Remove unused images so we can copy the rest over with a glob.
rm "${imgWorkDir}/${urlSafeIdentifier}".{png,jpg,svg}
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
popd > /dev/null || die "Couldn't pop directory."
fi
if [ "${build}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building ebook ... "
fi
git clone --quiet "${repoDir}" "${workDir}"
mkdir -p "${workDir}/downloads"
# Build the ebook.
if [ "${epubcheck}" = "true" ]; then
if ! se build --output-dir="${workDir}"/downloads/ --check --kindle --kobo "${workDir}"; then
rm --preserve-root --recursive --force "${workDir}"
die "Error building ebook, stopping deployment."
fi
else
if ! se build --output-dir="${workDir}"/downloads/ --kindle --kobo "${workDir}"; then
rm --preserve-root --recursive --force "${workDir}"
die "Error building ebook, stopping deployment."
fi
fi
# Build distributable covers.
convert -resize "1400" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover.jpg"
convert -resize "350" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover-thumbnail.jpg"
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
# Get the book URL.
bookUrl=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">.+?</dc:identifier>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/.*?url:https:\/\/standardebooks.org(.*?)<.*/\1/g")
# Get the last commit date so that we can update the modified timestamp in deployed `content.opf`. `generate-feeds` uses this timestamp in its output.
modifiedDate=$(TZ=UTC git log --date=iso-strict-local -1 --pretty=tformat:"%cd" --abbrev-commit | sed "s/+00:00/Z/")
sed --in-place --regexp-extended "s/<meta property=\"dcterms:modified\">.+?<\/meta>/<meta property=\"dcterms:modified\">${modifiedDate}<\/meta>/" "${workDir}/src/epub/content.opf"
if [ "${recompose}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Recomposing ebook ... "
fi
# Recompose the epub into a single file, but put it outside of the epub source for now so we don't stomp on it with the following sections.
# We do this first because the tweaks below shouldn't apply to the single-page file.
se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/css/web.css" "${workDir}"
# Wrap book contents in a `<main>` element.
sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${workDir}"/single-page.xhtml
# Add a navbar with a link back to the homepage.
sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li></ul></nav></header>|" "${workDir}"/single-page.xhtml
# Adjust sponsored links in the colophon.
sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/single-page.xhtml
# Add a canonical `<meta>` element.
sed --in-place "s|</title>|</title>\n\t\t<link rel=\"canonical\" href=\"https://standardebooks.org${bookUrl}/text/single-page\" />|" "${workDir}"/single-page.xhtml
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
fi
# Make some compatibility adjustments for the individual XHTML files.
workTitle=$(grep --only-matching --extended-regexp "<dc:title id=\"title\">(.+?)</dc:title>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/<[^>]+?>//g")
for filename in $(find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml"); do
# Add our web stylesheet to XHTML files.
sed --in-place --regexp-extended 's|</title>|</title>\n\t\t<link href="/css/web.css" media="screen" rel="stylesheet" type="text/css"/>|' "${filename}"
# Add `@lang` attributes.
sed --in-place --regexp-extended 's/xml:lang="([^"]+?)"/xml:lang="\1" lang="\1"/g' "${filename}"
# Add the work title to `<title>` elements in the source text.
sed --in-place --regexp-extended "s|<title>|<title>${workTitle} - |g" "${filename}"
# Wrap book contents in a `<main>` element.
sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${filename}"
# Add the header nav to each page.
sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><a href=\"/honeypot\" hidden=\"hidden\">Following this link will ban your IP for 24 hours</a><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li><li><a href=\"${bookUrl}/text\">Table of contents</a></li></ul></nav></header>|" "${filename}"
# Add a canonical `<meta>` element.
fileUrl=$(echo "${filename}" | sed --regexp-extended "s|^.+/src/epub(/text/.+\.xhtml$)|https://standardebooks.org${bookUrl}\1|")
# Special case for `toc.xhtml`.
if [[ "${fileUrl}" =~ /toc\.xhtml$ ]]; then
fileUrl="https://standardebooks.org${bookUrl}/text"
fi
sed --in-place "s|</title>|</title>\n\t\t<link rel=\"canonical\" href=\"${fileUrl}\" />|" "${filename}"
# Remove instances of the `.xhtml` filename extension in the source text.
sed --in-place 's/\.xhtml//g' "${filename}"
done
# Remove `-epub-*` CSS properties from CSS files as they're invalid in a web context.
sed --in-place --regexp-extended "s|\s*\-epub\-[^;]+?;||g" "${workDir}"/src/epub/css/*.css
# Add a chapter navigation footer to each page.
source "${scriptsDir}/../.venv/bin/activate"
"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"
deactivate
# Adjust sponsored links in the colophon.
sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/src/epub/text/colophon.xhtml
# Done adding compatibility!
if [ "${recompose}" = "true" ]; then
# Move the single-page file back into the `/src/epub/text/` folder.
mv "${workDir}"/single-page.xhtml "${workDir}"/src/epub/text/single-page.xhtml
fi
# Add viewport meta elements to all output.
find "${workDir}" \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place "s|</title>|</title>\n\t\t<meta content=\"width=device-width, initial-scale=1\" name=\"viewport\"/>|"
# Delete the contents of the old webdir.
rm --preserve-root --recursive --force "${webDir}"
# Re-create the webdir.
mkdir -p "${webDir}"
# Move contents of the work dir over.
mv "${workDir}"/downloads "${webDir}/"
rm "${workDir}/src/epub/onix.xml"
mv "${workDir}"/src/epub/* "${webDir}/"
fi
if [ "${images}" = "true" ]; then
# Move the cover images over.
mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/images/covers/"
fi
if [ "${verbose}" = "true" ]; then
printf "Updating/inserting ebook in database ... "
fi
"${scriptsDir}"/update-ebook-database --ebook-www-filesystem-path "${webDir}"
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
# Delete the now-empty work dir (empty except for `.git`).
rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"
sudo chgrp --preserve-root --recursive "${group}" "${webDir}"
sudo chmod --preserve-root --recursive g+ws "${webDir}"
sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/images/covers/"
sudo chmod --preserve-root --recursive g+ws "${webRoot}/images/covers/"
popd > /dev/null || die "Couldn't pop directory."
done
queuedTasks="false"
if which tsp > /dev/null; then
if tsp | grep --quiet --extended-regexp "^[0-9]+\s+queued"; then
queuedTasks="true"
fi
fi
if [ "${feeds}" = "true" ]; then
# Build the various feeds catalog, but only if we don't have more items in the `tsp` build queue.
if [ "${queuedTasks}" = "false" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building feeds ... "
fi
"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
"${scriptsDir}"/rebuild-cache feeds
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
else
if [ "${verbose}" = "true" ]; then
printf "Tasks queued after this one, not building feeds.\n"
fi
fi
fi
if [ "${bulkDownloads}" = "true" ]; then
# Build the various feeds catalog, but only if we don't have more items in the `tsp` build queue.
if [ "${queuedTasks}" = "false" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building bulk downloads ... "
fi
output=$("${scriptsDir}/generate-bulk-downloads" --webroot "${webRoot}")
if [ "${verbose}" = "true" ]; then
echo "${output}"
fi
"${scriptsDir}"/rebuild-cache bulk-downloads
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
else
if [ "${verbose}" = "true" ]; then
printf "Tasks queued after this one, not building bulk downloads.\n"
fi
fi
fi
if [ "${verbose}" = "true" ]; then
printf "Finished processing ebook.\n"
fi