mirror of
https://github.com/standardebooks/web.git
synced 2025-07-05 14:20:29 -04:00
Anonymize web logs after rotating and disable explicit download logging in favor of grepping the regular web logs
This commit is contained in:
parent
1ea3b2f28b
commit
1e698f2389
4 changed files with 76 additions and 16 deletions
|
@ -6,7 +6,7 @@ PHP 7+ is required.
|
|||
|
||||
```shell
|
||||
# Install Apache, PHP, PHP-FPM, and various other dependencies.
|
||||
sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler
|
||||
sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler ipv6calc
|
||||
|
||||
# Create the site root and logs root and clone this repo into it.
|
||||
sudo mkdir /standardebooks.org/
|
||||
|
|
|
@ -60,8 +60,9 @@ Define domain standardebooks.org
|
|||
DocumentRoot /standardebooks.org/web/www
|
||||
ErrorDocument 404 /404
|
||||
ErrorLog /var/log/local/www-error.log
|
||||
DirectorySlash Off
|
||||
RewriteEngine on
|
||||
CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined
|
||||
CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/web/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined
|
||||
|
||||
SSLEngine on
|
||||
SSLCertificateFile /etc/letsencrypt/live/${domain}/fullchain.pem
|
||||
|
@ -69,13 +70,6 @@ Define domain standardebooks.org
|
|||
Header always set Strict-Transport-Security "max-age=15768000"
|
||||
Header set Content-Security-Policy "default-src 'self';"
|
||||
|
||||
# Log downloads
|
||||
SetEnvIf Request_URI "\.epub$" logdownload
|
||||
SetEnvIf Request_URI "\.kepub.epub$" logdownload
|
||||
SetEnvIf Request_URI "\.azw3$" logdownload
|
||||
CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload
|
||||
DirectorySlash Off
|
||||
|
||||
<Directory /standardebooks.org/web/www/>
|
||||
# Disable .htaccess files
|
||||
AllowOverride none
|
||||
|
|
|
@ -60,6 +60,7 @@ Define domain standardebooks.test
|
|||
DocumentRoot /standardebooks.org/web/www
|
||||
ErrorDocument 404 /404
|
||||
ErrorLog /var/log/local/www-error.log
|
||||
DirectorySlash Off
|
||||
RewriteEngine on
|
||||
|
||||
SSLEngine on
|
||||
|
@ -68,13 +69,6 @@ Define domain standardebooks.test
|
|||
Header always set Strict-Transport-Security "max-age=15768000"
|
||||
Header set Content-Security-Policy "default-src 'self';"
|
||||
|
||||
# Log downloads
|
||||
SetEnvIf Request_URI "\.epub$" logdownload
|
||||
SetEnvIf Request_URI "\.kepub.epub$" logdownload
|
||||
SetEnvIf Request_URI "\.azw3$" logdownload
|
||||
CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload
|
||||
DirectorySlash Off
|
||||
|
||||
<Directory /standardebooks.org/web/www/>
|
||||
# Disable .htaccess files
|
||||
AllowOverride none
|
||||
|
|
72
scripts/rotate-www-logs
Executable file
72
scripts/rotate-www-logs
Executable file
|
@ -0,0 +1,72 @@
|
|||
#!/bin/bash
|
||||
|
||||
usage(){
|
||||
fmt <<EOF
|
||||
DESCRIPTION
|
||||
Moves Apache access log files into a by-month subdirectory, and gzip them.
|
||||
|
||||
This script must be run as root, and is generally run by the Apache rotatelogs subprocess as such.
|
||||
|
||||
Log files are moved to <LOG-DIR>/apache/YYYY-MM/
|
||||
|
||||
USAGE
|
||||
rotate-www-logs NEW-LOG-FILENAME
|
||||
EOF
|
||||
exit
|
||||
}
|
||||
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
|
||||
if [ $# -eq 1 ]; then if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then usage; fi fi
|
||||
# End boilerplate
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# Apache has a habit of starting this script twice, which can stomp on its own files
|
||||
for pid in $(pidof -x rotate-www-logs); do
|
||||
if [ "${pid}" != $$ ]; then
|
||||
# We echo and exit instead of die() because Apache prints stderr to the log, but not stdout. We don't need this logged.
|
||||
echo "rotate-www-logs is already running with PID ${pid}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Prevent the loop from entering if no matches are found for the pattern
|
||||
shopt -s nullglob
|
||||
|
||||
filenameBase=$(basename "$1" | sed --regexp-extended "s/\.[0-9]+$//")
|
||||
directory=$(dirname "$1")
|
||||
|
||||
for filename in ${directory}/${filenameBase}.*; do
|
||||
# When Apache calls this script, it passes the filename of the new log file it created.
|
||||
# Thus, we check here to make sure we don't process and then delete the brand-new log file!
|
||||
if [ "${filename}" != "$1" ]; then
|
||||
# Apache log files can have data for more than one day. Here we pull out entries for different days into different files.
|
||||
dates=$(grep --extended-regexp --only-matching "\[[0-9]{1,2}\/[a-zA-Z]{3}\/20[0-9]{2}" "${filename}" | sort -u)
|
||||
|
||||
while read -r line; do
|
||||
logRawDate=$(echo "${line}" | sed "s/\[//g" | sed "s/\// /g")
|
||||
logDate=$(date -d"${logRawDate}" "+%Y-%m-%d")
|
||||
logMonth=$(date -d"${logRawDate}" "+%Y-%m")
|
||||
grepString=${line//\[/}
|
||||
logFilename="www-access-${logDate}.log"
|
||||
|
||||
mkdir -p "${directory}/${logMonth}"
|
||||
|
||||
# Is the log file already existing and gzipped?
|
||||
if [ -f "${directory}/${logMonth}/${logFilename}.gz" ]; then
|
||||
gunzip "${directory}/${logMonth}/${logFilename}.gz"
|
||||
fi
|
||||
|
||||
# ipv6loganon is provided by the `ipv6calc` package
|
||||
grep --extended-regexp "\[${grepString}" "${filename}" | ipv6loganon --anonymize-paranoid >> "${directory}/${logMonth}/${logFilename}"
|
||||
|
||||
gzip --best "${directory}/${logMonth}/${logFilename}"
|
||||
|
||||
chown --preserve-root --recursive www-data:adm "${directory}/${logMonth}"
|
||||
chmod --preserve-root --recursive g+w "${directory}/${logMonth}"
|
||||
done <<< "${dates}"
|
||||
|
||||
rm "${filename}"
|
||||
fi
|
||||
done
|
Loading…
Add table
Add a link
Reference in a new issue