web/scripts/sync-ebooks
2019-06-21 17:38:52 -05:00

206 lines
4.8 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
set -o pipefail
usage(){
fmt <<EOF
DESCRIPTION
Syncs books from standardebooks.org GitHub org to specified folder.
USAGE
${0##*/} [-v,-vv,--verbosity INTEGER] [-u,--update-only] [--token TOKEN] DIRECTORY
With -v or --verbosity 1, display general progress updates.
With -vv or --verbosity 2, display general progress updates and verbose git output.
With --update-only, only sync existing repositories, do not download new repositories.
With --token TOKEN, specify a GitHub access token to use for request. Useful for when you hit the rate limit.
DIRECTORY should be where the repositories should go.
EXAMPLE
${0##*/} /standardebooks.org/ebooks
EOF
exit
}
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
check_arg() {
case "$2" in
''|$1) die "$3" ;;
esac
}
# End boilerplate
require "git" "Try: apt-get install git"
# Terminate on CTRL-C
trap ctrl_c INT
ctrl_c() {
exit
}
if [[ $# -eq 0 ]]; then
usage
fi
verbosity=0
updateOnly="false"
githubToken=""
target=""
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage ;;
-v)
verbosity=1
shift 1
;;
-vv)
verbosity=2
shift 1
;;
-u|--update-only)
updateOnly="true"
shift 1
;;
--verbosity)
check_arg '*[!0-9]*' "$2" "Verbosity is not a positive integer."
verbosity="$2"
shift 2
;;
--token)
check_arg '*[!0-9a-zA-Z]*' "$2" "Token is empty or contains illegal characters."
githubToken="$2"
shift 2
;;
*)
break ;;
esac
done
if [ $# -ne 1 ] || [ -z "$1" ]; then
usage
fi
target="$1"
if ! [ -d "${target}" ]; then
die "${target} is not a directory."
fi
if ! cd "${target}"; then
die "Couldnt cd into ${target}"
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Updating local repositories ... \n"
fi
for item in ./*; do
[ -e "${item}" ] || break
if [ "${verbosity}" -gt 0 ]; then
printf "Updating %s ... " "${item}"
fi
if [ "${verbosity}" -lt 2 ]; then
git -C "${item}" fetch -q
else
git -C "${item}" fetch -v
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Done.\n"
fi
done
if [ "${updateOnly}" = "true" ]; then
exit
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Cloning remote repositories ... \n"
printf "Fetching repository urls ..."
fi
url="https://api.github.com/orgs/standardebooks/repos?per_page=100"
repoUrls=""
while true; do
if [ -n "${githubToken}" ]; then
response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") ||
die "Curl request failed."
else
response=$(curl -si "${url}") ||
die "Curl request failed."
fi
if printf "%s" "${response}" | grep -q "^X-RateLimit-Remaining: 0$"; then
limitReset=$(printf "%s" "${response}" | grep -oP "^X-RateLimit-Reset: \K[0-9]+$")
printf "You have reached your daily allowance for unauthenticated GitHub API requests.\n\
Either wait until %s or use an OAuth token.\n\
You can create a new token at https://github.com/settings/tokens/new and \
pass it to this script with the --token option.\n\
The token does not need any permissions.\n" "$(date -d @"${limitReset}")" 1>&2
exit
fi
currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }')
repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}")
url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break
if [ "${verbosity}" -gt 0 ]; then
printf "."
fi
done
if [ "${verbosity}" -gt 0 ]; then
printf " Done.\n"
fi
repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" | awk 'NF')
printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
[ -n "${repoUrl}" ] || continue
[ -d "${repoUrl##*/}" ] && continue
repoName="${repoUrl##*/}"
repoNameLength=$(printf "%s" "${repoName}" | wc -m)
if [ "${repoNameLength}" -ge 100 ]; then
if dirs=( "${repoName%%.git}"*/ ) && [[ -d ${dirs[0]} ]]; then
continue
fi
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Cloning %s ... \n" "${repoUrl}"
fi
if [ "${verbosity}" -lt 2 ]; then
git clone -q --bare "${repoUrl}"
else
git clone -v --bare "${repoUrl}"
fi
if ! [ -d "${repoName}" ]; then
printf "Failed to clone %s.\n" "${repoName}." 1>&2
elif [ "${verbosity}" -gt 0 ]; then
printf "Done.\n"
fi
properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf |
grep -oE "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+<\/dc:identifier>" |
sed -E "s/<[^>]+?>//g" |
sed -E "s|url:https://standardebooks.org/ebooks/||g" |
sed -E "s|/|_|g").git"
if [ "${repoUrl##*/}" != "${properName}" ]; then
if [ "${verbosity}" -gt 0 ]; then
printf "Moving %s to %s\n" "${repoName}" "${properName}"
fi
mv "${repoName}" "${properName}"
fi
done