diff --git a/README.md b/README.md index a0352009..65717de5 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ PHP 7+ is required. ```shell # Install Apache, PHP, PHP-FPM, and various other dependencies. -sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler +sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler ipv6calc # Create the site root and logs root and clone this repo into it. sudo mkdir /standardebooks.org/ diff --git a/config/apache/standardebooks.org.conf b/config/apache/standardebooks.org.conf index 8ecee0b1..87eca2ee 100644 --- a/config/apache/standardebooks.org.conf +++ b/config/apache/standardebooks.org.conf @@ -60,8 +60,9 @@ Define domain standardebooks.org DocumentRoot /standardebooks.org/web/www ErrorDocument 404 /404 ErrorLog /var/log/local/www-error.log + DirectorySlash Off RewriteEngine on - CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined + CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/web/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined SSLEngine on SSLCertificateFile /etc/letsencrypt/live/${domain}/fullchain.pem @@ -69,13 +70,6 @@ Define domain standardebooks.org Header always set Strict-Transport-Security "max-age=15768000" Header set Content-Security-Policy "default-src 'self';" - # Log downloads - SetEnvIf Request_URI "\.epub$" logdownload - SetEnvIf Request_URI "\.kepub.epub$" logdownload - SetEnvIf Request_URI "\.azw3$" logdownload - CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload - DirectorySlash Off - # Disable .htaccess files AllowOverride none diff --git a/config/apache/standardebooks.test.conf b/config/apache/standardebooks.test.conf index f41443ea..6bcbcf86 100644 --- a/config/apache/standardebooks.test.conf +++ b/config/apache/standardebooks.test.conf @@ -60,6 +60,7 @@ Define domain standardebooks.test DocumentRoot /standardebooks.org/web/www ErrorDocument 404 /404 ErrorLog /var/log/local/www-error.log + DirectorySlash Off RewriteEngine on SSLEngine on @@ -68,13 +69,6 @@ Define domain standardebooks.test Header always set Strict-Transport-Security "max-age=15768000" Header set Content-Security-Policy "default-src 'self';" - # Log downloads - SetEnvIf Request_URI "\.epub$" logdownload - SetEnvIf Request_URI "\.kepub.epub$" logdownload - SetEnvIf Request_URI "\.azw3$" logdownload - CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload - DirectorySlash Off - # Disable .htaccess files AllowOverride none diff --git a/scripts/rotate-www-logs b/scripts/rotate-www-logs new file mode 100755 index 00000000..aeee3fa1 --- /dev/null +++ b/scripts/rotate-www-logs @@ -0,0 +1,72 @@ +#!/bin/bash + +usage(){ + fmt </apache/YYYY-MM/ + +USAGE + rotate-www-logs NEW-LOG-FILENAME +EOF + exit +} +die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; } +if [ $# -eq 1 ]; then if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then usage; fi fi +# End boilerplate + +if [ $# -eq 0 ]; then + usage +fi + +# Apache has a habit of starting this script twice, which can stomp on its own files +for pid in $(pidof -x rotate-www-logs); do + if [ "${pid}" != $$ ]; then + # We echo and exit instead of die() because Apache prints stderr to the log, but not stdout. We don't need this logged. + echo "rotate-www-logs is already running with PID ${pid}" + exit 1 + fi +done + +# Prevent the loop from entering if no matches are found for the pattern +shopt -s nullglob + +filenameBase=$(basename "$1" | sed --regexp-extended "s/\.[0-9]+$//") +directory=$(dirname "$1") + +for filename in ${directory}/${filenameBase}.*; do + # When Apache calls this script, it passes the filename of the new log file it created. + # Thus, we check here to make sure we don't process and then delete the brand-new log file! + if [ "${filename}" != "$1" ]; then + # Apache log files can have data for more than one day. Here we pull out entries for different days into different files. + dates=$(grep --extended-regexp --only-matching "\[[0-9]{1,2}\/[a-zA-Z]{3}\/20[0-9]{2}" "${filename}" | sort -u) + + while read -r line; do + logRawDate=$(echo "${line}" | sed "s/\[//g" | sed "s/\// /g") + logDate=$(date -d"${logRawDate}" "+%Y-%m-%d") + logMonth=$(date -d"${logRawDate}" "+%Y-%m") + grepString=${line//\[/} + logFilename="www-access-${logDate}.log" + + mkdir -p "${directory}/${logMonth}" + + # Is the log file already existing and gzipped? + if [ -f "${directory}/${logMonth}/${logFilename}.gz" ]; then + gunzip "${directory}/${logMonth}/${logFilename}.gz" + fi + + # ipv6loganon is provided by the `ipv6calc` package + grep --extended-regexp "\[${grepString}" "${filename}" | ipv6loganon --anonymize-paranoid >> "${directory}/${logMonth}/${logFilename}" + + gzip --best "${directory}/${logMonth}/${logFilename}" + + chown --preserve-root --recursive www-data:adm "${directory}/${logMonth}" + chmod --preserve-root --recursive g+w "${directory}/${logMonth}" + done <<< "${dates}" + + rm "${filename}" + fi +done