web/scripts/deploy-ebook-to-www
2024-01-15 12:59:53 -06:00

479 lines
18 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
usage(){
fmt <<EOF
DESCRIPTION
Deploy a Standard Ebook source repository to the web.
USAGE
deploy-ebook-to-www [-v,--verbose] [-g,--group GROUP] [--webroot WEBROOT] [--weburl WEBURL] [--no-images] [--no-build] [--no-epubcheck] [--no-recompose] [--no-feeds] [--no-bulk-downloads] [-l,--last-push-hash HASH] DIRECTORY [DIRECTORY...]
DIRECTORY is a bare source repository.
GROUP is a groupname. Defaults to "se".
WEBROOT is the path to your webroot. Defaults to "/standardebooks.org/web/www".
WEBURL is the URL the website is served on. Defaults to "https://standardebooks.org".
The deploy process does four things:
1. Build distributable ebook files using 'se build'.
2. Build single-page and toc-based web-readable versions using 'se recompose-epub'.
3. Generate various images and thumbnails for web use.
4. Generates RSS/Atom/OPDS feeds, but only if no other tasks are queued behind this one.
5. Generate bulk download files, but only if no other tasks are queued behind this one.
With --no-images, do not create cover thumbnails or hero images for the web.
With --no-build, do not run 'se build' or 'se recompose' to create distributable ebook files or web-readable versions.
With --no-epubcheck, run 'se build' instead of 'se build --check'.
With --no-recompose, do not run 'se recompose-epub' to generate single-page output.
With --last-push-hash, check the repo head against HASH to see if the cover image or ebook source changed, which will determine if cover thumbnails get re-generated or the ebook gets re-built.
With --no-feeds, don't generate RSS/Atom/OPDS feeds.
With --no-bulk-downloads, don't generate bulk download files.
EOF
exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
# End boilerplate
verbose="false"
images="true"
build="true"
group="se"
webRoot="/standardebooks.org/web/www"
webUrl="https://standardebooks.org"
lastPushHash=""
epubcheck="true"
recompose="true"
feeds="true"
bulkDownloads="true"
coverArtDatabase="false"
if [ $# -eq 0 ]; then
usage
fi
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage ;;
-v|--verbose)
verbose="true"
shift 1
;;
-g|--group)
[ -n "$2" ] || die "Group cant be empty."
group="$2"
shift 2
;;
--webroot)
[ -n "$2" ] || die "Web root cant be empty."
webRoot="$2"
shift 2
;;
--weburl)
[ -n "$2" ] || die "Web URL cant be empty."
webUrl="$2"
shift 2
;;
-l|--last-push-hash)
[ -n "$2" ] || die "Last commit hash cant be empty."
lastPushHash="$2"
shift 2
;;
--no-images)
images="false"
shift 1
;;
--no-epubcheck)
epubcheck="false"
shift 1
;;
--no-recompose)
recompose="false"
shift 1
;;
--no-build)
build="false"
shift 1
;;
--no-feeds)
feeds="false"
shift 1
;;
--no-bulk-downloads)
bulkDownloads="false"
shift 1
;;
*) break ;;
esac
done
if ! [ "$(getent group "${group}")" ]; then
die "Group ${group} does not exist. Either use --group or create the group."
fi
if ! [ -d "${webRoot}" ]; then
die "${webRoot} does not exist or is not a directory."
fi
# Check for dependencies
require "convert" "Try: apt-get install imagemagick"
require "git" "Try: apt-get install git"
require "php" "Try: apt-get install php-cli"
require "rsvg-convert" "Try: apt-get install librsvg2-bin"
require "se" "Read: https://standardebooks.org/tools"
# cavif is compiled via Cargo: https://github.com/kornelski/cavif-rs
scriptsDir="$(dirname "$(readlink -f "$0")")"
if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
fi
if ! [ -f "${scriptsDir}"/generate-feeds ]; then
die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
fi
if ! [ -f "${scriptsDir}"/generate-bulk-downloads ]; then
die "\"${scriptsDir}\"/generate-bulk-downloads\" is not a file or could not be found."
fi
mkdir -p "${webRoot}"/images/covers/
for dir in "$@"
do
if [ "${dir}" = "" ]; then
continue
fi
repoDir=$(realpath "${dir%/}")
baseName=$(basename "${repoDir}")
if [ "${baseName}" = ".git" ]; then
continue
fi
if [ ! -d "${repoDir}" ]; then
die "Invalid repo root: ${repoDir}"
fi
if ! pushd "${repoDir}" > /dev/null; then
echo "Couldn't cd into ${repoDir}. Skipping."
continue
fi
if [ "${verbose}" = "true" ]; then
printf "Entering %s\n" "${repoDir}"
fi
if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
printf "Looks like a draft ebook, skipping\n"
continue
fi
urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")
if [ "${lastPushHash}" != "" ]; then
# We were passed the hash of the last push before this one.
# Check to see if the cover image changed, to decide if we want to rebuild the cover image thumbnail/hero
diff=$(git diff "${lastPushHash}" HEAD)
if [ "${images}" = "true" ]; then
# Always build images if they don't exist, or if they've changed
if [[ ! -f "${webRoot}/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then
images="true"
else
images="false"
fi
fi
if [ "${build}" = "true" ]; then
# Check to see if the actual ebook changed, to decide if we want to build
# Always build if it doesn't exist
if [[ ! -f "${webDir}/downloads/*.epub" ]] || [[ "${diff}" =~ diff\ --git\ a/src/ ]]; then
build="true"
else
build="false"
fi
fi
fi
webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")
if [ "${webDir}" = "" ]; then
die "Empty webdir!"
fi
workDir=$(mktemp -d)
imgWorkDir=$(mktemp -d)
webDir="${webRoot}/ebooks/${webDir}"
if [ "${images}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Generating cover image for web ... "
fi
git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"
# We have to cd into the work dir, otherwise convert won't pick up the relative path of the jpg background in cover.svg
pushd "${imgWorkDir}" > /dev/null || exit
# Build the hero image for individual ebook pages
# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078
convert -resize "1318" -crop "1318x439+0+659" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
convert -resize "2636" -crop "2636x860+0+1318" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero.avif"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.avif"
# Build the cover image thumbnail
# We use JPG instead of SVG for several reasons:
# 1. A JPG is roughly 1/2 the file size of the same SVG, because the SVG must contain the JPG in base64 encoding
# 2. The "scale up" effect on mouse hover is blurry when used on SVGs.
sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"
# Resize and compress the cover image (formula from Google Page Speed Insights)
# We can't use `convert` directly to do svg -> jpg, as sometimes it fails to load the linked cover.jpg within cover.svg.
# So, we use `rsvg-convert` to write a png, then `convert` to convert and compress to jpg.
rsvg-convert --width 242 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
rsvg-convert --width 484 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover.avif"
"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.avif"
sudo chgrp --preserve-root --recursive "${group}" "${imgWorkDir}/${urlSafeIdentifier}"*
sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*
# Remove unused images so we can copy the rest over with a glob
rm "${imgWorkDir}/${urlSafeIdentifier}".{png,jpg,svg}
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
popd > /dev/null || die "Couldn't pop directory."
fi
if [ "${build}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building ebook ... "
fi
git clone --quiet "${repoDir}" "${workDir}"
mkdir -p "${workDir}/downloads"
# Build the ebook
if [ "${epubcheck}" = "true" ]; then
if ! se build --output-dir="${workDir}"/downloads/ --check --kindle --kobo "${workDir}"; then
rm --preserve-root --recursive --force "${workDir}"
die "Error building ebook, stopping deployment."
fi
else
if ! se build --output-dir="${workDir}"/downloads/ --kindle --kobo "${workDir}"; then
rm --preserve-root --recursive --force "${workDir}"
die "Error building ebook, stopping deployment."
fi
fi
# Build distributable covers
convert -resize "1400" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover.jpg"
convert -resize "350" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover-thumbnail.jpg"
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
# Get the book URL
bookUrl=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">.+?</dc:identifier>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/.*?url:https:\/\/standardebooks.org(.*?)<.*/\1/g")
# Get the last commit date so that we can update the modified timestamp in
# deployed content.opf. generate-opds uses this timestamp in its output.
modifiedDate=$(TZ=UTC git log --date=iso-strict-local -1 --pretty=tformat:"%cd" --abbrev-commit | sed "s/+00:00/Z/")
sed --in-place --regexp-extended "s/<meta property=\"dcterms:modified\">.+?<\/meta>/<meta property=\"dcterms:modified\">${modifiedDate}<\/meta>/" "${workDir}/src/epub/content.opf"
if [ "${recompose}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Recomposing ebook ... "
fi
# Recompose the epub into a single file, but put it outside of the epub src for now so we don't stomp on it with the following sections.
# We do this first because the tweaks below shouldn't apply to the single-page file
se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/css/web.css" "${workDir}"
# Wrap book contents in a <main> tag
sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${workDir}"/single-page.xhtml
# Add a navbar with a link back to the homepage
sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li></ul></nav></header>|" "${workDir}"/single-page.xhtml
# Adjust sponsored links in the colophon
sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/single-page.xhtml
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
fi
# Make some compatibility adjustments for the individual XHTML files
# Remove instances of the .xhtml filename extension in the source text
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place 's/\.xhtml//g'
# Add our web stylesheet to XHTML files
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended 's|</title>|</title>\n\t\t<link href="/css/web.css" media="screen" rel="stylesheet" type="text/css"/>|'
# Remove -epub-* CSS properties from CSS files as they're invalid in a web context
sed --in-place --regexp-extended "s|\s*\-epub\-[^;]+?;||g" "${workDir}"/src/epub/css/*.css
# Add lang attributes
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended 's/xml:lang="([^"]+?)"/xml:lang="\1" lang="\1"/g'
# Add the work title to <title> tags in the source text
workTitle=$(grep --only-matching --extended-regexp "<dc:title id=\"title\">(.+?)</dc:title>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/<[^>]+?>//g")
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<title>|<title>${workTitle} - |g"
# Wrap book contents in a <main> tag
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|"
# Add the header nav to each page
find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li><li><a href=\"${bookUrl}/text\">Table of contents</a></li></ul></nav></header>|"
# Add a chapter navigation footer to each page
"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"
# Adjust sponsored links in the colophon
sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/src/epub/text/colophon.xhtml
# Done adding compatibility!
if [ "${recompose}" = "true" ]; then
# Move the single-page file back into the /src/epub/text/ folder
mv "${workDir}"/single-page.xhtml "${workDir}"/src/epub/text/single-page.xhtml
fi
# Add viewport meta elements to all output
find "${workDir}" \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place "s|</title>|</title>\n\t\t<meta content=\"width=device-width, initial-scale=1\" name=\"viewport\"/>|"
# Delete the contents of the old webdir
rm --preserve-root --recursive --force "${webDir}"
# Re-create the webdir
mkdir -p "${webDir}"
# Move contents of the work dir over
mv "${workDir}"/downloads "${webDir}/"
rm "${workDir}/src/epub/onix.xml"
mv "${workDir}"/src/epub/* "${webDir}/"
fi
if [ "${images}" = "true" ]; then
# Move the cover images over
mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/images/covers/"
fi
if [ "${coverArtDatabase}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "Updating/inserting cover in the cover art database ... "
fi
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
fi
# Delete the now-empty work dir (empty except for .git)
rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"
sudo chgrp --preserve-root --recursive "${group}" "${webDir}"
sudo chmod --preserve-root --recursive g+ws "${webDir}"
sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/images/covers/"
sudo chmod --preserve-root --recursive g+ws "${webRoot}/images/covers/"
popd > /dev/null || die "Couldn't pop directory."
done
queuedTasks="false"
if tsp | grep --quiet --extended-regexp "^[0-9]+\s+queued"; then
queuedTasks="true"
fi
if [ "${queuedTasks}" = "false" ]; then
if [ "${verbose}" = "true" ]; then
printf "Rebuilding web library cache ... "
fi
"${scriptsDir}"/rebuild-cache library
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
else
if [ "${verbose}" = "true" ]; then
printf "Tasks queued after this one, not rebuilding web library cache.\n"
fi
fi
if [ "${feeds}" = "true" ]; then
# Build the various feeds catalog, but only if we don't have more items in the tsp build queue.
if [ "${queuedTasks}" = "false" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building feeds ... "
fi
"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
"${scriptsDir}"/rebuild-cache feeds
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
else
if [ "${verbose}" = "true" ]; then
printf "Tasks queued after this one, not building feeds.\n"
fi
fi
fi
if [ "${bulkDownloads}" = "true" ]; then
# Build the various feeds catalog, but only if we don't have more items in the tsp build queue.
if [ "${queuedTasks}" = "false" ]; then
if [ "${verbose}" = "true" ]; then
printf "Building bulk downloads ... "
fi
output=$("${scriptsDir}/generate-bulk-downloads" --webroot "${webRoot}")
if [ "${verbose}" = "true" ]; then
echo "${output}"
fi
"${scriptsDir}"/rebuild-cache bulk-downloads
if [ "${verbose}" = "true" ]; then
printf "Done.\n"
fi
else
if [ "${verbose}" = "true" ]; then
printf "Tasks queued after this one, not building bulk downloads.\n"
fi
fi
fi
if [ "${verbose}" = "true" ]; then
printf "Finished processing ebook.\n"
fi