web/scripts/deploy-ebook-to-www

#!/bin/bash
set -e

usage(){
	fmt <<EOF
DESCRIPTION
	Deploy a Standard Ebook source repository to the web.

USAGE
	deploy-ebook-to-www [-v,--verbose] [-g,--group GROUP] [--webroot WEBROOT] [--weburl WEBURL] [--no-images] [--no-build] [--no-epubcheck] [--no-recompose] [--no-feeds] [--no-bulk-downloads] [-l,--last-push-hash HASH] DIRECTORY [DIRECTORY...]
		DIRECTORY is a bare source repository.
		GROUP is a groupname. Defaults to "se".
		WEBROOT is the path to your webroot. Defaults to "/standardebooks.org/web/www".
		WEBURL is the URL the website is served on. Defaults to "https://standardebooks.org".

		The deploy process does four things:

		1. Build distributable ebook files using 'se build'.

		2. Build single-page and toc-based web-readable versions using 'se recompose-epub'.

		3. Generate various images and thumbnails for web use.

		4. Generates RSS/Atom/OPDS feeds, but only if no other tasks are queued behind this one.

		5. Generate bulk download files, but only if no other tasks are queued behind this one.

		With --no-images, do not create cover thumbnails or hero images for the web.

		With --no-build, do not run 'se build' or 'se recompose' to create distributable ebook files or web-readable versions.

		With --no-epubcheck, run 'se build' instead of 'se build --check'.

		With --no-recompose, do not run 'se recompose-epub' to generate single-page output.

		With --last-push-hash, check the repo head against HASH to see if the cover image or ebook source changed, which will determine if cover thumbnails get re-generated or the ebook gets re-built.

		With --no-feeds, don't generate RSS/Atom/OPDS feeds.

		With --no-bulk-downloads, don't generate bulk download files.

EOF
	exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
# End boilerplate

verbose="false"
images="true"
build="true"
group="se"
webRoot="/standardebooks.org/web/www"
webUrl="https://standardebooks.org"
lastPushHash=""
epubcheck="true"
recompose="true"
feeds="true"
bulkDownloads="true"
coverArtDatabase="false"

if [ $#  -eq 0 ]; then
	usage
fi

while [ $# -gt 0 ]; do
	case "$1" in
		-h|--help)
			usage ;;
		-v|--verbose)
			verbose="true"
			shift 1
			;;
		-g|--group)
			[ -n "$2" ] || die "Group can’t be empty."
			group="$2"
			shift 2
			;;
		--webroot)
			[ -n "$2" ] || die "Web root can’t be empty."
			webRoot="$2"
			shift 2
			;;
		--weburl)
			[ -n "$2" ] || die "Web URL can’t be empty."
			webUrl="$2"
			shift 2
			;;
		-l|--last-push-hash)
			[ -n "$2" ] || die "Last commit hash can’t be empty."
			lastPushHash="$2"
			shift 2
			;;
		--no-images)
			images="false"
			shift 1
			;;
		--no-epubcheck)
			epubcheck="false"
			shift 1
			;;
		--no-recompose)
			recompose="false"
			shift 1
			;;
		--no-build)
			build="false"
			shift 1
			;;
		--no-feeds)
			feeds="false"
			shift 1
			;;
		--no-bulk-downloads)
			bulkDownloads="false"
			shift 1
			;;
		*) break ;;
	esac
done

if ! [ "$(getent group "${group}")" ]; then
	die "Group ${group} does not exist. Either use --group or create the group."
fi

if ! [ -d "${webRoot}" ]; then
	die "${webRoot} does not exist or is not a directory."
fi

# Check for dependencies
require "convert" "Try: apt-get install imagemagick"
require "git" "Try: apt-get install git"
require "php" "Try: apt-get install php-cli"
require "rsvg-convert" "Try: apt-get install librsvg2-bin"
require "se" "Read: https://standardebooks.org/tools"

# cavif is compiled via Cargo: https://github.com/kornelski/cavif-rs

scriptsDir="$(dirname "$(readlink -f "$0")")"

if ! [ -x "${scriptsDir}"/reset-php-fpm-opcache ]; then
	die "\"${scriptsDir}\"/reset-php-fpm-opcache is not an executable file."
fi

if ! [ -f "${scriptsDir}"/generate-feeds ]; then
	die "\"${scriptsDir}\"/generate-feeds\" is not a file or could not be found."
fi

if ! [ -f "${scriptsDir}"/generate-bulk-downloads ]; then
	die "\"${scriptsDir}\"/generate-bulk-downloads\" is not a file or could not be found."
fi

mkdir -p "${webRoot}"/images/covers/

for dir in "$@"
do
	if [ "${dir}" = "" ]; then
		continue
	fi

	repoDir=$(realpath "${dir%/}")
	baseName=$(basename "${repoDir}")

	if [ "${baseName}" = ".git" ]; then
		continue
	fi

	if [ ! -d "${repoDir}" ]; then
		die "Invalid repo root: ${repoDir}"
	fi

	if ! pushd "${repoDir}" > /dev/null; then
		echo "Couldn't cd into ${repoDir}. Skipping."
		continue
	fi

	if [ "${verbose}" = "true" ]; then
		printf "Entering %s\n" "${repoDir}"
	fi

	if git show HEAD:src/epub/content.opf | grep --quiet --extended-regexp "<dc:date>1900-01-01T00:00:00Z</dc:date>"; then
		printf "Looks like a draft ebook, skipping\n"
		continue
	fi

	urlSafeIdentifier=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s|url:https://standardebooks.org/ebooks/||g" | sed --regexp-extended "s|/|_|g")

	if [ "${lastPushHash}" != "" ]; then
		# We were passed the hash of the last push before this one.
		# Check to see if the cover image changed, to decide if we want to rebuild the cover image thumbnail/hero
		diff=$(git diff "${lastPushHash}" HEAD)

		if [ "${images}" = "true" ]; then
			# Always build images if they don't exist, or if they've changed
			if [[ ! -f "${webRoot}/images/covers/${urlSafeIdentifier}-cover.jpg" ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.jpg ]] || [[ "${diff}" =~ diff\ --git\ a/images/cover.svg ]]; then
				images="true"
			else
				images="false"
			fi
		fi

		if [ "${build}" = "true" ]; then
			# Check to see if the actual ebook changed, to decide if we want to build
			# Always build if it doesn't exist
			if [[ ! -f "${webDir}/downloads/*.epub" ]] || [[ "${diff}" =~ diff\ --git\ a/src/ ]]; then
				build="true"
			else
				build="false"
			fi
		fi
	fi

	webDir=$(git show HEAD:src/epub/content.opf | grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | sed --regexp-extended "s/<[^>]+?>//g" | sed --regexp-extended "s/^url:https:\/\/standardebooks.org\/ebooks\/?//")

	if [ "${webDir}" = "" ]; then
		die "Empty webdir!"
	fi

	workDir=$(mktemp -d)
	imgWorkDir=$(mktemp -d)
	webDir="${webRoot}/ebooks/${webDir}"

	if [ "${images}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Generating cover image for web ... "
		fi

		git show HEAD:images/cover.jpg > "${imgWorkDir}/${urlSafeIdentifier}.jpg"
		git show HEAD:images/cover.svg > "${imgWorkDir}/${urlSafeIdentifier}.svg"

		# We have to cd into the work dir, otherwise convert won't pick up the relative path of the jpg background in cover.svg
		pushd "${imgWorkDir}" > /dev/null || exit

		# Build the hero image for individual ebook pages
		# Resize and crop the image to 2156 width, 720 height, and starting at the coords 0,1078
		convert -resize "1318" -crop "1318x439+0+659" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg"
		convert -resize "2636" -crop "2636x860+0+1318" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.jpg" "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg"

		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero.avif"
		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-hero@2x.avif"

		# Build the cover image thumbnail
		# We use JPG instead of SVG for several reasons:
		# 1. A JPG is roughly 1/2 the file size of the same SVG, because the SVG must contain the JPG in base64 encoding
		# 2. The "scale up" effect on mouse hover is blurry when used on SVGs.

		sed -i "s/cover\.jpg/${urlSafeIdentifier}\.jpg/g" "${imgWorkDir}/${urlSafeIdentifier}.svg"

		# Resize and compress the cover image (formula from Google Page Speed Insights)
		# We can't use `convert` directly to do svg -> jpg, as sometimes it fails to load the linked cover.jpg within cover.svg.
		# So, we use `rsvg-convert` to write a png, then `convert` to convert and compress to jpg.
		rsvg-convert --width 242 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
		rsvg-convert --width 484 --keep-aspect-ratio --output "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}.svg"
		convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}.png" "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg"
		convert -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${imgWorkDir}/${urlSafeIdentifier}@2x.png" "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg"

		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover.avif"
		"${scriptsDir}"/cavif --quiet --quality 50 "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.jpg" -o "${imgWorkDir}/${urlSafeIdentifier}-cover@2x.avif"

		sudo chgrp --preserve-root --recursive "${group}" "${imgWorkDir}/${urlSafeIdentifier}"*
		sudo chmod --preserve-root --recursive g+w "${imgWorkDir}/${urlSafeIdentifier}"*

		# Remove unused images so we can copy the rest over with a glob
		rm "${imgWorkDir}/${urlSafeIdentifier}".{png,jpg,svg}

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi

		popd > /dev/null || die "Couldn't pop directory."
	fi

	if [ "${build}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building ebook ... "
		fi

		git clone --quiet "${repoDir}" "${workDir}"

		mkdir -p "${workDir}/downloads"

		# Build the ebook
		if [ "${epubcheck}" = "true" ]; then
			if ! se build --output-dir="${workDir}"/downloads/ --check --kindle --kobo "${workDir}"; then
				rm --preserve-root --recursive --force "${workDir}"
				die "Error building ebook, stopping deployment."
			fi
		else
			if ! se build --output-dir="${workDir}"/downloads/ --kindle --kobo "${workDir}"; then
				rm --preserve-root --recursive --force "${workDir}"
				die "Error building ebook, stopping deployment."
			fi
		fi

		# Build distributable covers
		convert -resize "1400" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover.jpg"
		convert -resize "350" -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG "${workDir}/src/epub/images/cover.svg" ""${workDir}"/downloads/cover-thumbnail.jpg"

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi

		# Get the book URL
		bookUrl=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">.+?</dc:identifier>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/.*?url:https:\/\/standardebooks.org(.*?)<.*/\1/g")

		# Get the last commit date so that we can update the modified timestamp in
		# deployed content.opf. generate-opds uses this timestamp in its output.
		modifiedDate=$(TZ=UTC git log --date=iso-strict-local -1 --pretty=tformat:"%cd" --abbrev-commit | sed "s/+00:00/Z/")
		sed --in-place --regexp-extended "s/<meta property=\"dcterms:modified\">.+?<\/meta>/<meta property=\"dcterms:modified\">${modifiedDate}<\/meta>/" "${workDir}/src/epub/content.opf"

		if [ "${recompose}" = "true" ]; then
			if [ "${verbose}" = "true" ]; then
				printf "Recomposing ebook ... "
			fi

			# Recompose the epub into a single file, but put it outside of the epub src for now so we don't stomp on it with the following sections.
			# We do this first because the tweaks below shouldn't apply to the single-page file
			se recompose-epub --xhtml --output "${workDir}"/single-page.xhtml --extra-css-file="${webRoot}/css/web.css" "${workDir}"

			# Wrap book contents in a <main> tag
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|" "${workDir}"/single-page.xhtml

			# Add a navbar with a link back to the homepage
			sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li></ul></nav></header>|" "${workDir}"/single-page.xhtml

			# Adjust sponsored links in the colophon
			sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/single-page.xhtml

			if [ "${verbose}" = "true" ]; then
				printf "Done.\n"
			fi
		fi

		# Make some compatibility adjustments for the individual XHTML files

		# Remove instances of the .xhtml filename extension in the source text
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place 's/\.xhtml//g'

		# Add our web stylesheet to XHTML files
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended 's|</title>|</title>\n\t\t<link href="/css/web.css" media="screen" rel="stylesheet" type="text/css"/>|'

		# Remove -epub-* CSS properties from CSS files as they're invalid in a web context
		sed --in-place --regexp-extended "s|\s*\-epub\-[^;]+?;||g" "${workDir}"/src/epub/css/*.css

		# Add lang attributes
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended 's/xml:lang="([^"]+?)"/xml:lang="\1" lang="\1"/g'

		# Add the work title to <title> tags in the source text
		workTitle=$(grep --only-matching --extended-regexp "<dc:title id=\"title\">(.+?)</dc:title>" "${workDir}"/src/epub/content.opf | sed --regexp-extended "s/<[^>]+?>//g")
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<title>|<title>${workTitle} - |g"

		# Wrap book contents in a <main> tag
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<body([^>]*)>|<body><main\1>|; s|</body>|</main></body>|"

		# Add the header nav to each page
		find "${workDir}"/src/epub \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s|<body([^>]*)>|<body\1><header><nav><ul><li><a href=\"/\">Standard Ebooks</a></li><li><a href=\"${bookUrl}\">Back to ebook</a></li><li><a href=\"${bookUrl}/text\">Table of contents</a></li></ul></nav></header>|"

		# Add a chapter navigation footer to each page
		"${scriptsDir}"/inject-chapter-navigation-footer "${workDir}" "${bookUrl}"

		# Adjust sponsored links in the colophon
		sed --in-place 's|<p><a href="http|<p><a rel="nofollow" href="http|g' "${workDir}"/src/epub/text/colophon.xhtml

		# Done adding compatibility!

		if [ "${recompose}" = "true" ]; then
			# Move the single-page file back into the /src/epub/text/ folder
			mv "${workDir}"/single-page.xhtml "${workDir}"/src/epub/text/single-page.xhtml
		fi

		# Add viewport meta elements to all output
		find "${workDir}" \( -type d -name .git -prune \) -o -type f -name "*.xhtml" -print0 | xargs -0 sed --in-place "s|</title>|</title>\n\t\t<meta content=\"width=device-width, initial-scale=1\" name=\"viewport\"/>|"

		# Delete the contents of the old webdir
		rm --preserve-root --recursive --force "${webDir}"

		# Re-create the webdir
		mkdir -p "${webDir}"

		# Move contents of the work dir over
		mv "${workDir}"/downloads "${webDir}/"
		rm "${workDir}/src/epub/onix.xml"
		mv "${workDir}"/src/epub/* "${webDir}/"
	fi

	if [ "${images}" = "true" ]; then
		# Move the cover images over
		mv "${imgWorkDir}/${urlSafeIdentifier}"*.{jpg,avif} "${webRoot}/images/covers/"
	fi

	if [ "${coverArtDatabase}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Updating/inserting cover in the cover art database ... "
		fi

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi
	fi

	# Delete the now-empty work dir (empty except for .git)
	rm --preserve-root --recursive --force "${workDir}" "${imgWorkDir}"

	sudo chgrp --preserve-root --recursive "${group}" "${webDir}"
	sudo chmod --preserve-root --recursive g+ws "${webDir}"

	sudo chgrp --preserve-root --recursive "${group}" "${webRoot}/images/covers/"
	sudo chmod --preserve-root --recursive g+ws "${webRoot}/images/covers/"

	popd > /dev/null || die "Couldn't pop directory."
done

queuedTasks="false"
if tsp | grep --quiet --extended-regexp "^[0-9]+\s+queued"; then
	queuedTasks="true"
fi

if [ "${queuedTasks}" = "false" ]; then
	if [ "${verbose}" = "true" ]; then
		printf "Rebuilding web library cache ... "
	fi

	"${scriptsDir}"/rebuild-cache library

	if [ "${verbose}" = "true" ]; then
		printf "Done.\n"
	fi
else
	if [ "${verbose}" = "true" ]; then
		printf "Tasks queued after this one, not rebuilding web library cache.\n"
	fi
fi

if [ "${feeds}" = "true" ]; then
	# Build the various feeds catalog, but only if we don't have more items in the tsp build queue.
	if [ "${queuedTasks}" = "false" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building feeds ... "
		fi

		"${scriptsDir}/generate-feeds" --webroot "${webRoot}" --weburl "${webUrl}"
		"${scriptsDir}"/rebuild-cache feeds

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi
	else
		if [ "${verbose}" = "true" ]; then
			printf "Tasks queued after this one, not building feeds.\n"
		fi
	fi
fi

if [ "${bulkDownloads}" = "true" ]; then
	# Build the various feeds catalog, but only if we don't have more items in the tsp build queue.
	if [ "${queuedTasks}" = "false" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "Building bulk downloads ... "
		fi

		output=$("${scriptsDir}/generate-bulk-downloads" --webroot "${webRoot}")
		if [ "${verbose}" = "true" ]; then
			echo "${output}"
		fi
		"${scriptsDir}"/rebuild-cache bulk-downloads

		if [ "${verbose}" = "true" ]; then
			printf "Done.\n"
		fi
	else
		if [ "${verbose}" = "true" ]; then
			printf "Tasks queued after this one, not building bulk downloads.\n"
		fi
	fi
fi

if [ "${verbose}" = "true" ]; then
	printf "Finished processing ebook.\n"
fi