web/scripts/deploy-ebook-to-www

#!/bin/bash
set -e

usage(){
	fmt <<EOF
DESCRIPTION
	Deploy a Standard Ebook source repository to the web.

USAGE
	deploy-ebook-to-www [-v,--verbose] [-g,--group GROUP] [--webroot WEBROOT] [--weburl WEBURL] [--no-images] [--no-build] [--no-epubcheck] [--no-recompose] [--no-feeds] [--no-bulk-downloads] [-l,--last-push-hash HASH] DIRECTORY [DIRECTORY...]
		DIRECTORY is a bare source repository.
		GROUP is a groupname. Defaults to "se".
		WEBROOT is the path to your webroot. Defaults to "/standardebooks.org/web/www".
		WEBURL is the URL the website is served on. Defaults to "https://standardebooks.org".

		The deploy process does four things:

		1. Build distributable ebook files using 'se build'.

		2. Build single-page and toc-based web-readable versions using 'se recompose-epub'.

		3. Generate various images and thumbnails for web use.

		4. Generates RSS/Atom/OPDS feeds, but only if no other tasks are queued behind this one.

		5. Generate bulk download files, but only if no other tasks are queued behind this one.

		With --no-images, do not create cover thumbnails or hero images for the web.

		With --no-build, do not run 'se build' or 'se recompose' to create distributable ebook files or web-readable versions.

		With --no-epubcheck, run 'se build' instead of 'se build --check'.

		With --no-recompose, do not run 'se recompose-epub' to generate single-page output.

		With --last-push-hash, check the repo head against HASH to see if the cover image or ebook source changed, which will determine if cover thumbnails get re-generated or the ebook gets re-built.

		With --no-feeds, don't generate RSS/Atom/OPDS feeds.

		With --no-bulk-downloads, don't generate bulk download files.

EOF
	exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
# End boilerplate

verbose="false"
images="true"
build="true"
group="se"
webRoot="/standardebooks.org/web/www"
webUrl="https://standardebooks.org"
lastPushHash=""
epubcheck="true"
recompose="true"
feeds="true"
bulkDownloads="true"

if [ $# -eq 0 ]; then
	usage
fi

while [ $# -gt 0 ]; do
	case "$1" in
		-h|--help)
			usage ;;
		-v|--verbose)
			verbose="true"
			shift 1
			;;
		-g|--group)
			[ -n "$2" ] || die "Group can’t be empty."
			group="$2"
			shift 2
			;;
		--webroot)
			[ -n "$2" ] || die "Web root can’t be empty."
			webRoot="$2"
			shift 2
			;;
		--weburl)
			[ -n "$2" ] || die "Web URL can’t be empty."
			webUrl="$2"
			shift 2
			;;
		-l|--last-push-hash)
			[ -n "$2" ] || die "Last commit hash can’t be empty."
			lastPushHash="$2"
			shift 2
			;;
		--no-images)
			images="false"
			shift 1
			;;
		--no-epubcheck)
			epubcheck="false"
			shift 1
			;;
		--no-recompose)
			recompose="false"
			shift 1
			;;
		--no-build)
			build="false"
			shift 1
			;;
		--no-feeds)
			feeds="false"
			shift 1
			;;
		--no-bulk-downloads)
			bulkDownloads="false"
			shift 1
			;;
		*) break ;;
	esac
done

if ! [ "$(getent group "${group}")" ]; then
	die "Group ${group} does not exist. Either use --group or create the group."
fi

if ! [ -d "${webRoot}" ]; then
	die "${webRoot} does not exist or is not a directory."
fi

# Check for dependencies.
require "convert" "Try: apt-get install imagemagick"
require "git" "Try: apt-get install git"
require "php" "Try: apt-get install php-cli"
require "rsvg-convert" "Try: apt-get install librsvg2-bin"
require "se" "Read: https://standardebooks.org/tools"

# `cavif` is compiled via Cargo, see <https://github.com/kornelski/cavif-rs>.

scriptsDir="$(dirname "$(readlink -f "$0")")"

if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
	die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
fi

if ! [ -f "${scriptsDir}"/generate-feeds ]; then
	die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
fi

if ! [ -f "${scriptsDir}"/generate-bulk-downloads ]; then
	die "\"${scriptsDir}\"/generate-bulk-downloads\" is not a file or could not be found."
fi

if ! [ -f "${scriptsDir}/../.venv/bin/activate" ]; then
	die "\"${scriptsDir}\"/../.venv/bin/activate\" is not a file or could not be found. Make sure you have installed the \`standardebooks\` package into a Python virtual environment."
fi

mkdir -p "${webRoot}"/images/covers/

for dir in "$@"
do
	if [ "${dir}" = "" ]; then
		continue
	fi

	repoDir=$(realpath "${dir%/}")
	baseName=$(basename "${repoDir}")

	if [ "${baseName}" = ".git" ]; then
		continue
	fi

	if [ ! -d "${repoDir}" ]; then
		die "Invalid repo root: ${repoDir}"
	fi

	if ! pushd "${repoDir}" > /dev/null; then
		echo "Couldn't cd into ${repoDir}. Skipping."
		continue
	fi

	if [ "${verbose}" = "true" ]; then
		printf "Entering %s\n" "${repoDir}"
	fi

	if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
		printf "Looks like a draft ebook, skipping\n"
		continue
	fi

	urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")

	if [ "${lastPushHash}" != "" ]; then
		# We were passed the hash of the last push before this one.
		# Check to see if the cover image changed, to decide if we want to rebuild the cover image thumbnail/hero.
		diff=$(git diff "${lastPushHash}" HEAD)

		if [ "${images}" = "true" ]; then
			# Always build images if they don't exist, or if they've changed
			if [[ ! -f "${webRoot}/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then
				images="true"
			else
				images="false"
			fi
		fi

		if [ "${build}" = "true" ]; then
			# Check to see if the actual ebook changed, to decide if we want to build.
			# Always build if it doesn't exist.
			if [[ ! -f "${webDir}/downloads/*.epub" ]] || [[ "${diff}" =~ diff\ --git\ a/src/ ]]; then
				build="true"
			else
				build="false"
			fi
		fi
	fi

	webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")

	if [ "${webDir}" = "" ]; then
		die "Empty webdir!"
	fi

	workDir=$(mktemp -d)
	imgWorkDir=$(mktemp -d)
	webDir="${webRoot}/ebooks/${webDir}"

	if [ "${images}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Generating cover image for web ... "
		fi

		git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
		git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"

		# We have to `cd` into the work dir, otherwise `convert` won't pick up the relative path of the jpg background in `cover.svg`.
		pushd "${imgWorkDir}" > /dev/null || exit

		# Build the hero image for individual ebook pages.
		# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078.
		convert -resize "1318" -crop "1318x439+0+659" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
		convert -resize "2636" -crop "2636x860+0+1318" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"

		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero.avif"
		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.avif"

		# Build the cover image thumbnail.
		# We use JPG instead of SVG for several reasons:
		# 1. A JPG is roughly 1/2 the file size of the same SVG, because the SVG must contain the JPG in base64 encoding.
		# 2. The "scale up" effect on mouse hover is blurry when used on SVGs.

		sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"

		# Resize and compress the cover image (formula from Google Page Speed Insights).
		# We can't use `convert` directly to do svg -> jpg, as sometimes it fails to load the linked `cover.jpg` within `cover.svg`.
		# So, we use `rsvg-convert` to write a png, then `convert` to convert and compress to jpg.
		rsvg-convert --width 242 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
		rsvg-convert --width 484 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
		convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
		convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"

		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover.avif"
		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.avif"

		sudo chgrp --preserve-root --recursive "${group}" "${imgWorkDir}/${urlSafeIdentifier}"*
		sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*

		# Remove unused images so we can copy the rest over with a glob.
		rm "${imgWorkDir}/${urlSafeIdentifier}".{png,jpg,svg}

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi

		popd > /dev/null || die "Couldn't pop directory."
	fi

	if [ "${build}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building ebook ... "
		fi

		git clone --quiet "${repoDir}" "${workDir}"

		mkdir -p "${workDir}/downloads"

		# Build the ebook.
		if [ "${epubcheck}" = "true" ]; then
			if ! se build --output-dir="${workDir}"/downloads/ --check --kindle --kobo "${workDir}"; then
				rm --preserve-root --recursive --force "${workDir}"
				die "Error building ebook, stopping deployment."
			fi
		else
			if ! se build --output-dir="${workDir}"/downloads/ --kindle --kobo "${workDir}"; then
				rm --preserve-root --recursive --force "${workDir}"
				die "Error building ebook, stopping deployment."
			fi
		fi

		# Build distributable covers.
		convert -resize "1400" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover.jpg"
		convert -resize "350" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover-thumbnail.jpg"

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi

		# Get the book URL.
		bookUrl=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">.+?</dc:identifier>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/.*?url:https:\/\/standardebooks.org(.*?)<.*/\1/g")

		# Get the last commit date so that we can update the modified timestamp in deployed `content.opf`. `generate-feeds` uses this timestamp in its output.
		modifiedDate=$(TZ=UTC git log --date=iso-strict-local -1 --pretty=tformat:"%cd" --abbrev-commit | sed "s/+00:00/Z/")
		sed --in-place --regexp-extended "s/<meta property=\"dcterms:modified\">.+?<\/meta>/<meta property=\"dcterms:modified\">${modifiedDate}<\/meta>/" "${workDir}/src/epub/content.opf"

		if [ "${recompose}" = "true" ]; then
			if [ "${verbose}" = "true" ]; then
				printf "Recomposing ebook ... "
			fi

			# Recompose the epub into a single file, but put it outside of the epub source for now so we don't stomp on it with the following sections.
			# We do this first because the tweaks below shouldn't apply to the single-page file.
			se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/css/web.css" "${workDir}"

			# Wrap book contents in a `<main>` element.
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${workDir}"/single-page.xhtml

			# Add a navbar with a link back to the homepage.
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li></ul></nav></header>|" "${workDir}"/single-page.xhtml

			# Adjust sponsored links in the colophon.
			sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/single-page.xhtml

			# Add a canonical `<meta>` element.
			sed --in-place "s|</title>|</title>\n\t\t<link rel=\"canonical\" href=\"https://standardebooks.org${bookUrl}/text/single-page\" />|" "${workDir}"/single-page.xhtml

			if [ "${verbose}" = "true" ]; then
				printf "Done.\n"
			fi
		fi

		# Make some compatibility adjustments for the individual XHTML files.
		workTitle=$(grep --only-matching --extended-regexp "<dc:title id=\"title\">(.+?)</dc:title>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/<[^>]+?>//g")

		for filename in $(find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml"); do

			# Add our web stylesheet to XHTML files.
			sed --in-place --regexp-extended 's|</title>|</title>\n\t\t<link href="/css/web.css" media="screen" rel="stylesheet" type="text/css"/>|' "${filename}"

			# Add `@lang` attributes.
			sed --in-place --regexp-extended 's/xml:lang="([^"]+?)"/xml:lang="\1" lang="\1"/g' "${filename}"

			# Add the work title to `<title>` elements in the source text.
			sed --in-place --regexp-extended "s|<title>|<title>${workTitle} - |g" "${filename}"

			# Wrap book contents in a `<main>` element.
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${filename}"

			# Add the header nav to each page.
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><a href=\"/honeypot\" hidden=\"hidden\">Following this link will ban your IP for 24 hours</a><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li><li><a href=\"${bookUrl}/text\">Table of contents</a></li></ul></nav></header>|" "${filename}"

			# Add a canonical `<meta>` element.
			fileUrl=$(echo "${filename}" | sed --regexp-extended "s|^.+/src/epub(/text/.+\.xhtml$)|https://standardebooks.org${bookUrl}\1|")

			# Special case for `toc.xhtml`.
			if [[ "${fileUrl}" =~ /toc\.xhtml$ ]]; then
				fileUrl="https://standardebooks.org${bookUrl}/text"
			fi

			sed --in-place "s|</title>|</title>\n\t\t<link rel=\"canonical\" href=\"${fileUrl}\" />|" "${filename}"

			# Remove instances of the `.xhtml` filename extension in the source text.
			sed --in-place 's/\.xhtml//g' "${filename}"
		done

		# Remove `-epub-*` CSS properties from CSS files as they're invalid in a web context.
		sed --in-place --regexp-extended "s|\s*\-epub\-[^;]+?;||g" "${workDir}"/src/epub/css/*.css

		# Add a chapter navigation footer to each page.
		source "${scriptsDir}/../.venv/bin/activate"
		"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"
		deactivate

		# Adjust sponsored links in the colophon.
		sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/src/epub/text/colophon.xhtml

		# Done adding compatibility!

		if [ "${recompose}" = "true" ]; then
			# Move the single-page file back into the `/src/epub/text/` folder.
			mv "${workDir}"/single-page.xhtml "${workDir}"/src/epub/text/single-page.xhtml
		fi

		# Add viewport meta elements to all output.
		find "${workDir}" \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place "s|</title>|</title>\n\t\t<meta content=\"width=device-width, initial-scale=1\" name=\"viewport\"/>|"

		# Delete the contents of the old webdir.
		rm --preserve-root --recursive --force "${webDir}"

		# Re-create the webdir.
		mkdir -p "${webDir}"

		# Move contents of the work dir over.
		mv "${workDir}"/downloads "${webDir}/"
		rm "${workDir}/src/epub/onix.xml"
		mv "${workDir}"/src/epub/* "${webDir}/"
	fi

	if [ "${images}" = "true" ]; then
		# Move the cover images over.
		mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/images/covers/"
	fi

	if [ "${verbose}" = "true" ]; then
		printf "Updating/inserting ebook in database ... "
	fi

	"${scriptsDir}"/update-ebook-database --ebook-www-filesystem-path "${webDir}"

	if [ "${verbose}" = "true" ]; then
		printf "Done.\n"
	fi

	# Delete the now-empty work dir (empty except for `.git`).
	rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"

	sudo chgrp --preserve-root --recursive "${group}" "${webDir}"
	sudo chmod --preserve-root --recursive g+ws "${webDir}"

	sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/images/covers/"
	sudo chmod --preserve-root --recursive g+ws "${webRoot}/images/covers/"

	popd > /dev/null || die "Couldn't pop directory."
done

queuedTasks="false"
if which tsp > /dev/null; then
	if tsp | grep --quiet --extended-regexp "^[0-9]+\s+queued"; then
		queuedTasks="true"
	fi
fi

if [ "${feeds}" = "true" ]; then
	# Build the various feeds catalog, but only if we don't have more items in the `tsp` build queue.
	if [ "${queuedTasks}" = "false" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building feeds ... "
		fi

		"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
		"${scriptsDir}"/rebuild-cache feeds

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi
	else
		if [ "${verbose}" = "true" ]; then
			printf "Tasks queued after this one, not building feeds.\n"
		fi
	fi
fi

if [ "${bulkDownloads}" = "true" ]; then
	# Build the various feeds catalog, but only if we don't have more items in the `tsp` build queue.
	if [ "${queuedTasks}" = "false" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building bulk downloads ... "
		fi

		output=$("${scriptsDir}/generate-bulk-downloads" --webroot "${webRoot}")
		if [ "${verbose}" = "true" ]; then
			echo "${output}"
		fi
		"${scriptsDir}"/rebuild-cache bulk-downloads

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi
	else
		if [ "${verbose}" = "true" ]; then
			printf "Tasks queued after this one, not building bulk downloads.\n"
		fi
	fi
fi

if [ "${verbose}" = "true" ]; then
	printf "Finished processing ebook.\n"
fi