Add check to prevent script dying if repository does not have metadata file, improve documentation

This commit is contained in:
vr8ce 2020-07-31 23:57:55 -05:00 committed by Alex Cabal
parent b605568a85
commit 8243756f77

View file

@ -1,4 +1,4 @@
#!/bin/bash #! /usr/bin/env bash
set -e set -e
set -o pipefail set -o pipefail
@ -13,27 +13,43 @@ USAGE
With -v or --verbosity 1, display general progress updates. With -v or --verbosity 1, display general progress updates.
With -vv or --verbosity 2, display general progress updates and verbose git output. With -vv or --verbosity 2, display general progress updates and verbose git output.
With --update-only, only sync existing repositories, do not download new repositories. With --update-only, only sync existing repositories, do not download new repositories.
With -b or --bare, clone a bare repository (for a server) instead of a working directory With -b or --bare, clone bare repositories (for a server) instead of working directories.
With --token TOKEN, specify a GitHub access token to use for request. Useful for when you hit the rate limit. With --token TOKEN, specify a GitHub access token to use for request. Useful for when you hit the rate limit.
DIRECTORY should be where the repositories should go. DIRECTORY should be where the repositories should go.
NOTE: This script requires GNU versions of grep and sed. If you are on a Mac, you will need to
install GNU versions (via Homebrew, MacPorts, etc.) and make sure they are first in your path,
or modify the script to use the GNU versions if they're named differently.
EXAMPLE EXAMPLE
${0##*/} /standardebooks.org/ebooks ${0##*/} /standardebooks.org/ebooks
EOF EOF
exit exit
} }
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } } # functions used by the script
die(){
printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2;
exit 1;
}
require(){
command -v "$1" > /dev/null 2>&1 || {
suggestion="";
if [ -n "$2" ]; then
suggestion=" $2";
fi
die "$1 is required but not installed.${suggestion}";
}
}
check_arg() { check_arg() {
case "$2" in case "$2" in
''|$1) die "$3" ;; ''|$1) die "$3" ;;
esac esac
} }
# End boilerplate # end functions
require "git" "Try: apt-get install git"
# Terminate on CTRL-C # Terminate on CTRL-C
trap ctrl_c INT trap ctrl_c INT
@ -41,6 +57,8 @@ ctrl_c() {
exit exit
} }
require "git"
if [[ $# -eq 0 ]]; then if [[ $# -eq 0 ]]; then
usage usage
fi fi
@ -51,6 +69,7 @@ githubToken=""
target="" target=""
bare="" bare=""
# process each of the parameters one at a time, shifting each time to get the next one
while [ $# -gt 0 ]; do while [ $# -gt 0 ]; do
case "$1" in case "$1" in
-h|--help) -h|--help)
@ -100,6 +119,7 @@ if ! cd "${target}"; then
die "Couldnt cd into ${target}" die "Couldnt cd into ${target}"
fi fi
# update any existing repositories
if [ "${verbosity}" -gt 0 ]; then if [ "${verbosity}" -gt 0 ]; then
printf "Updating local repositories ... \n" printf "Updating local repositories ... \n"
fi fi
@ -111,6 +131,7 @@ for item in ./*; do
printf "Updating %s ... " "${item}" printf "Updating %s ... " "${item}"
fi fi
# this works whether the repository is bare or a working directory
if [ "${verbosity}" -lt 2 ]; then if [ "${verbosity}" -lt 2 ]; then
git -C "${item}" fetch -q git -C "${item}" fetch -q
else else
@ -126,15 +147,18 @@ if [ "${updateOnly}" = "true" ]; then
exit exit
fi fi
# clone the remaining repositories
if [ "${verbosity}" -gt 0 ]; then if [ "${verbosity}" -gt 0 ]; then
printf "Cloning remote repositories ... \n" printf "Cloning remote repositories ... \n"
printf "Fetching repository urls ..." printf "Fetching repository urls ..."
fi fi
# get all of the repository names from the GitHub API, one "page" at a time
url="https://api.github.com/orgs/standardebooks/repos?per_page=100" url="https://api.github.com/orgs/standardebooks/repos?per_page=100"
repoUrls="" repoUrls=""
while true; do while true; do
# get a "page" worth of repository URL's
if [ -n "${githubToken}" ]; then if [ -n "${githubToken}" ]; then
response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") || response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") ||
die "Curl request failed." die "Curl request failed."
@ -153,9 +177,11 @@ while true; do
exit exit
fi fi
# parse the response to get the current page's URL's
currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }') currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }')
# add them to the full list in repoUrls
repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}") repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}")
# set the variable to get the next "page"
url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break
if [ "${verbosity}" -gt 0 ]; then if [ "${verbosity}" -gt 0 ]; then
@ -167,8 +193,10 @@ if [ "${verbosity}" -gt 0 ]; then
printf " Done.\n" printf " Done.\n"
fi fi
# skip the non-ebook repositories by removing their names from the list
repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" -e "/sublime-text-se-plugin.git\$" | awk 'NF') repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" -e "/sublime-text-se-plugin.git\$" | awk 'NF')
# process the list, reading one repository at a time
printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
# make sure it's not an empty string # make sure it's not an empty string
[ -n "${repoUrl}" ] || continue [ -n "${repoUrl}" ] || continue
@ -182,6 +210,7 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
# if the repo already exists, skip it (handled in the update above) # if the repo already exists, skip it (handled in the update above)
[ -d "${repoName}" ] && continue [ -d "${repoName}" ] && continue
# it's not clear what this is doing, or more specifically why it's doing it
repoNameLength=$(printf "%s" "${repoName}" | wc -m) repoNameLength=$(printf "%s" "${repoName}" | wc -m)
if [ "${repoNameLength}" -ge 100 ]; then if [ "${repoNameLength}" -ge 100 ]; then
if dirs=( "${repoName}"*/ ) && [[ -d ${dirs[0]} ]]; then if dirs=( "${repoName}"*/ ) && [[ -d ${dirs[0]} ]]; then
@ -193,18 +222,24 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
printf "Cloning %s ... \n" "${repoUrl}" printf "Cloning %s ... \n" "${repoUrl}"
fi fi
# clone the repository, creating either a bare or working directory based on the option
if [ "${verbosity}" -lt 2 ]; then if [ "${verbosity}" -lt 2 ]; then
git clone -q ${bare} "${repoUrl}" git clone -q ${bare} "${repoUrl}"
else else
git clone -v ${bare} "${repoUrl}" git clone -v ${bare} "${repoUrl}"
fi fi
# if a directory with the repository name doesn't exist, the clone did not complete successfully
if ! [ -d "${repoName}" ]; then if ! [ -d "${repoName}" ]; then
printf "Failed to clone %s.\n" "${repoName}." 1>&2 printf "Failed to clone %s.\n" "${repoName}." 1>&2
elif [ "${verbosity}" -gt 0 ]; then elif [ "${verbosity}" -gt 0 ]; then
printf "Done.\n" printf "Done.\n"
fi fi
# if the repository doesn't have a metadata file, skip to the next repository
metadata=$(git -C "${repoName}" show HEAD:src/epub/content.opf > /dev/null 2>&1) || continue
# get the last segment of the dc:identifier from the metadata
properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf | properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf |
grep -oE "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" | grep -oE "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" |
sed -E "s/<[^>]+?>//g" | sed -E "s/<[^>]+?>//g" |
@ -214,6 +249,8 @@ printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
properName="${properName%.git}" properName="${properName%.git}"
fi fi
# if for some reason the repository name isn't the same as the identifier (they are identical
# 99% of the time), rename the directory to be the identifier name; not sure why this is done, either
if [ "${repoName}" != "${properName}" ]; then if [ "${repoName}" != "${properName}" ]; then
if [ "${verbosity}" -gt 0 ]; then if [ "${verbosity}" -gt 0 ]; then
printf "Moving %s to %s\n" "${repoName}" "${properName}" printf "Moving %s to %s\n" "${repoName}" "${properName}"