#! /usr/bin/env bash set -e set -o pipefail usage(){ cat <&2; exit 1; } require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; then suggestion=" $2"; fi die "$1 is required but not installed.${suggestion}"; } } check_arg() { case "$2" in ''|$1) die "$3" ;; esac } # end functions # Terminate on CTRL-C trap ctrl_c INT ctrl_c() { exit } require "git" if [[ $# -eq 0 ]]; then usage fi verbosity=0 updateOnly="false" githubToken="" target="" bare="" # process each of the parameters one at a time, shifting each time to get the next one while [ $# -gt 0 ]; do case "$1" in -h|--help) usage ;; -v) verbosity=1 shift 1 ;; -vv) verbosity=2 shift 1 ;; -u|--update-only) updateOnly="true" shift 1 ;; --verbosity) check_arg '*[!0-9]*' "$2" "Verbosity is not a positive integer." verbosity="$2" shift 2 ;; --token) check_arg '*[!0-9a-zA-Z_]*' "$2" "Token is empty or contains illegal characters." githubToken="$2" shift 2 ;; -b|--bare) bare="--mirror" shift 1 ;; *) break ;; esac done if [ $# -ne 1 ] || [ -z "$1" ]; then usage fi target="$1" if ! [ -d "${target}" ]; then die "${target} is not a directory." fi if ! cd "${target}"; then die "Couldn’t cd into ${target}" fi # update any existing repositories if [ "${verbosity}" -gt 0 ]; then printf "Updating local repositories ... \n" fi for item in ./*/; do [ -e "${item}" ] || break if [ "${verbosity}" -gt 0 ]; then printf "Updating %s ... " "${item}" fi # if it's not a repository directory, skip it git -C "${item}" rev-parse > /dev/null 2>&1 || continue # this works whether the repository is bare or a working directory if [ "${verbosity}" -lt 2 ]; then git -C "${item}" fetch -q else git -C "${item}" fetch -v fi if [ "${verbosity}" -gt 0 ]; then printf "Done.\n" fi done if [ "${updateOnly}" = "true" ]; then exit fi # clone the remaining repositories if [ "${verbosity}" -gt 0 ]; then printf "Cloning remote repositories ... \n" printf "Fetching repository urls ..." fi # get all of the repository names from the GitHub API, one "page" at a time url="https://api.github.com/orgs/standardebooks/repos?per_page=100" repoUrls="" while true; do # get a "page" worth of repository URL's if [ -n "${githubToken}" ]; then response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") || die "Curl request failed." else response=$(curl -si "${url}") || die "Curl request failed." fi if printf "%s" "${response}" | grep -q "^X-RateLimit-Remaining: 0$"; then limitReset=$(printf "%s" "${response}" | grep -oP "^X-RateLimit-Reset: \K[0-9]+$") printf "You have reached your daily allowance for unauthenticated GitHub API requests.\n\ Either wait until %s or use an OAuth token.\n\ You can create a new token at https://github.com/settings/tokens/new and \ pass it to this script with the --token option.\n\ The token does not need any permissions.\n" "$(date -d @"${limitReset}")" 1>&2 exit fi # parse the response to get the current page's URL's currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }') # add them to the full list in repoUrls repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}") # set the variable to get the next "page" url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break if [ "${verbosity}" -gt 0 ]; then printf "." fi done if [ "${verbosity}" -gt 0 ]; then printf " Done.\n" fi # skip the non-ebook repositories by removing their names from the list repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" -e "/standard-blackletter.git\$" -e "/sublime-text-se-plugin.git\$" | awk 'NF') # process the list, reading one repository at a time printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do # make sure it's not an empty string [ -n "${repoUrl}" ] || continue # strip everything prior to the last segment of the name repoName="${repoUrl##*/}" if [ "${bare}" = "" ]; then repoName="${repoName%.git}" fi # if the repo already exists, skip it (handled in the update above) [ -d "${repoName}" ] && continue # if the repository name has been truncated due to GitHub's name length limits, # but a local clone with the full name exists, don't attempt to clone it again repoNameLength=$(printf "%s" "${repoName%.git}" | wc -m) if [ "${repoNameLength}" -ge 100 ]; then if dirs=( "${repoName%.git}"*/ ) && [[ -d ${dirs[0]} ]]; then continue fi fi if [ "${verbosity}" -gt 0 ]; then printf "Cloning %s ... \n" "${repoUrl}" fi # clone the repository, creating either a bare or working directory based on the option if [ "${verbosity}" -lt 2 ]; then git clone -q ${bare} "${repoUrl}" else git clone -v ${bare} "${repoUrl}" fi # if a directory with the repository name doesn't exist, the clone did not complete successfully if ! [ -d "${repoName}" ]; then printf "Failed to clone %s.\n" "${repoName}." 1>&2 elif [ "${verbosity}" -gt 0 ]; then printf "Done.\n" fi # if the repository doesn't have a metadata file, skip to the next repository metadata=$(git -C "${repoName}" show HEAD:src/epub/content.opf > /dev/null 2>&1) || continue # get the last segment of the dc:identifier from the metadata properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf | grep -oE "url:https://standardebooks.org/ebooks/[^<]+" | sed -E "s/<[^>]+?>//g" | sed -E "s|url:https://standardebooks.org/ebooks/||g" | sed -E "s|/|_|g").git" if [ "${bare}" = "" ]; then properName="${properName%.git}" fi # if for some reason the repository name isn't the same as the identifier (they are identical # 99% of the time), rename the directory to be the identifier name; not sure why this is done, either if [ "${repoName}" != "${properName}" ]; then if [ -d "${properName}" ]; then if [ "${verbosity}" -gt 0 ]; then printf "Not moving %s to %s: directory exists\n" "${repoName}" "${properName}" fi else if [ "${verbosity}" -gt 0 ]; then printf "Moving %s to %s\n" "${repoName}" "${properName}" fi mv "${repoName}" "${properName}" fi fi done