Add check to prevent script dying if repository does not have metadata file, improve documentation

2025-07-16 11:26:37 -04:00 · 2020-07-31 23:57:55 -05:00 · 2020-07-31 23:57:55 -05:00 · 8243756f77
commit 8243756f77
parent b605568a85
1 changed files with 45 additions and 8 deletions
--- a/scripts/sync-ebooks
+++ b/scripts/sync-ebooks
@ -1,4 +1,4 @@
-#!/bin/bash
+#! /usr/bin/env bash
 set -e
 set -o pipefail
@ -13,27 +13,43 @@ USAGE
 	With -v or --verbosity 1, display general progress updates.
 	With -vv or --verbosity 2, display general progress updates and verbose git output.
 	With --update-only, only sync existing repositories, do not download new repositories.
-	With -b or --bare, clone a bare repository (for a server) instead of a working directory
+	With -b or --bare, clone bare repositories (for a server) instead of working directories.
 	With --token TOKEN, specify a GitHub access token to use for request. Useful for when you hit the rate limit.
 	DIRECTORY should be where the repositories should go.
 	NOTE: This script requires GNU versions of grep and sed. If you are on a Mac, you will need to
 	install GNU versions (via Homebrew, MacPorts, etc.) and make sure they are first in your path,
 	or modify the script to use the GNU versions if they're named differently.
 EXAMPLE
 	${0##*/} /standardebooks.org/ebooks
 EOF
 	exit
 }
-die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
+
-require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
+# functions used by the script
 die(){
 	printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2;
 	exit 1;
 }
 require(){
 	command -v "$1" > /dev/null 2>&1 || {
 		suggestion="";
 		if [ -n "$2" ]; then
 			suggestion=" $2";
 		fi
 		die "$1 is required but not installed.${suggestion}";
 	}
 }
 check_arg() {
 	case "$2" in
 		''|$1) die "$3" ;;
 	esac
 }
-# End boilerplate
+# end functions
 require "git" "Try: apt-get install git"
 # Terminate on CTRL-C
 trap ctrl_c INT
@ -41,6 +57,8 @@ ctrl_c() {
 	exit
 }
 require "git"
 if [[ $# -eq 0 ]]; then
 	usage
 fi
@ -51,6 +69,7 @@ githubToken=""
 target=""
 bare=""
 # process each of the parameters one at a time, shifting each time to get the next one
 while [ $# -gt 0 ]; do
 	case "$1" in
 		-h|--help)
@ -100,6 +119,7 @@ if ! cd "${target}"; then
 	die "Couldn’t cd into ${target}"
 fi
 # update any existing repositories
 if [ "${verbosity}" -gt 0 ]; then
 	printf "Updating local repositories ... \n"
 fi
@ -111,6 +131,7 @@ for item in ./*; do
 		printf "Updating %s ... " "${item}"
 	fi
 	# this works whether the repository is bare or a working directory
 	if [ "${verbosity}" -lt 2 ]; then
 		git -C "${item}" fetch -q
 	else
@ -126,15 +147,18 @@ if [ "${updateOnly}" = "true" ]; then
 	exit
 fi
 # clone the remaining repositories
 if [ "${verbosity}" -gt 0 ]; then
 	printf "Cloning remote repositories ... \n"
 	printf "Fetching repository urls ..."
 fi
 # get all of the repository names from the GitHub API, one "page" at a time
 url="https://api.github.com/orgs/standardebooks/repos?per_page=100"
 repoUrls=""
 while true; do
 	# get a "page" worth of repository URL's
 	if [ -n "${githubToken}" ]; then
 		response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") ||
 			die "Curl request failed."
@ -153,9 +177,11 @@ while true; do
 		exit
 	fi
-
+	# parse the response to get the current page's URL's
 	currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }')
 	# add them to the full list in repoUrls
 	repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}")
 	# set the variable to get the next "page"
 	url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break
 	if [ "${verbosity}" -gt 0 ]; then
@ -167,8 +193,10 @@ if [ "${verbosity}" -gt 0 ]; then
 	printf " Done.\n"
 fi
 # skip the non-ebook repositories by removing their names from the list
 repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" -e "/sublime-text-se-plugin.git\$" | awk 'NF')
 # process the list, reading one repository at a time
 printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
 	# make sure it's not an empty string
 	[ -n "${repoUrl}" ] || continue
@ -182,6 +210,7 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
 	# if the repo already exists, skip it (handled in the update above)
 	[ -d "${repoName}" ] && continue
 	# it's not clear what this is doing, or more specifically why it's doing it
 	repoNameLength=$(printf "%s" "${repoName}" | wc -m)
 	if [ "${repoNameLength}" -ge 100 ]; then
 		if dirs=( "${repoName}"*/ ) && [[ -d ${dirs[0]} ]]; then
@ -193,18 +222,24 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
 		printf "Cloning %s ... \n" "${repoUrl}"
 	fi
 	# clone the repository, creating either a bare or working directory based on the option
 	if [ "${verbosity}" -lt 2 ]; then
 		git clone -q ${bare} "${repoUrl}"
 	else
 		git clone -v ${bare} "${repoUrl}"
 	fi
 	# if a directory with the repository name doesn't exist, the clone did not complete successfully
 	if ! [ -d "${repoName}" ]; then
 		printf "Failed to clone %s.\n" "${repoName}." 1>&2
 	elif [ "${verbosity}" -gt 0 ]; then
 		printf "Done.\n"
 	fi
 	# if the repository doesn't have a metadata file, skip to the next repository
 	metadata=$(git -C "${repoName}" show HEAD:src/epub/content.opf > /dev/null 2>&1) || continue
 	# get the last segment of the dc:identifier from the metadata
 	properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf |
 		grep -oE "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" |
 		sed -E "s/<[^>]+?>//g" |
@ -214,6 +249,8 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
 		properName="${properName%.git}"
 	fi
 	# if for some reason the repository name isn't the same as the identifier (they are identical
 	# 99% of the time), rename the directory to be the identifier name; not sure why this is done, either
 	if [ "${repoName}" != "${properName}" ]; then
 		if [ "${verbosity}" -gt 0 ]; then
 			printf "Moving %s to %s\n" "${repoName}" "${properName}"