mirror of
https://github.com/standardebooks/web.git
synced 2025-07-05 14:20:29 -04:00
Anonymize web logs after rotating and disable explicit download logging in favor of grepping the regular web logs
This commit is contained in:
parent
1ea3b2f28b
commit
1e698f2389
4 changed files with 76 additions and 16 deletions
|
@ -6,7 +6,7 @@ PHP 7+ is required.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# Install Apache, PHP, PHP-FPM, and various other dependencies.
|
# Install Apache, PHP, PHP-FPM, and various other dependencies.
|
||||||
sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler
|
sudo apt install -y git composer php-fpm php-cli php-gd php-xml php-apcu php-mbstring php-intl apache2 apache2-utils libfcgi0ldbl task-spooler ipv6calc
|
||||||
|
|
||||||
# Create the site root and logs root and clone this repo into it.
|
# Create the site root and logs root and clone this repo into it.
|
||||||
sudo mkdir /standardebooks.org/
|
sudo mkdir /standardebooks.org/
|
||||||
|
|
|
@ -60,8 +60,9 @@ Define domain standardebooks.org
|
||||||
DocumentRoot /standardebooks.org/web/www
|
DocumentRoot /standardebooks.org/web/www
|
||||||
ErrorDocument 404 /404
|
ErrorDocument 404 /404
|
||||||
ErrorLog /var/log/local/www-error.log
|
ErrorLog /var/log/local/www-error.log
|
||||||
|
DirectorySlash Off
|
||||||
RewriteEngine on
|
RewriteEngine on
|
||||||
CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined
|
CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/web/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined
|
||||||
|
|
||||||
SSLEngine on
|
SSLEngine on
|
||||||
SSLCertificateFile /etc/letsencrypt/live/${domain}/fullchain.pem
|
SSLCertificateFile /etc/letsencrypt/live/${domain}/fullchain.pem
|
||||||
|
@ -69,13 +70,6 @@ Define domain standardebooks.org
|
||||||
Header always set Strict-Transport-Security "max-age=15768000"
|
Header always set Strict-Transport-Security "max-age=15768000"
|
||||||
Header set Content-Security-Policy "default-src 'self';"
|
Header set Content-Security-Policy "default-src 'self';"
|
||||||
|
|
||||||
# Log downloads
|
|
||||||
SetEnvIf Request_URI "\.epub$" logdownload
|
|
||||||
SetEnvIf Request_URI "\.kepub.epub$" logdownload
|
|
||||||
SetEnvIf Request_URI "\.azw3$" logdownload
|
|
||||||
CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload
|
|
||||||
DirectorySlash Off
|
|
||||||
|
|
||||||
<Directory /standardebooks.org/web/www/>
|
<Directory /standardebooks.org/web/www/>
|
||||||
# Disable .htaccess files
|
# Disable .htaccess files
|
||||||
AllowOverride none
|
AllowOverride none
|
||||||
|
|
|
@ -60,6 +60,7 @@ Define domain standardebooks.test
|
||||||
DocumentRoot /standardebooks.org/web/www
|
DocumentRoot /standardebooks.org/web/www
|
||||||
ErrorDocument 404 /404
|
ErrorDocument 404 /404
|
||||||
ErrorLog /var/log/local/www-error.log
|
ErrorLog /var/log/local/www-error.log
|
||||||
|
DirectorySlash Off
|
||||||
RewriteEngine on
|
RewriteEngine on
|
||||||
|
|
||||||
SSLEngine on
|
SSLEngine on
|
||||||
|
@ -68,13 +69,6 @@ Define domain standardebooks.test
|
||||||
Header always set Strict-Transport-Security "max-age=15768000"
|
Header always set Strict-Transport-Security "max-age=15768000"
|
||||||
Header set Content-Security-Policy "default-src 'self';"
|
Header set Content-Security-Policy "default-src 'self';"
|
||||||
|
|
||||||
# Log downloads
|
|
||||||
SetEnvIf Request_URI "\.epub$" logdownload
|
|
||||||
SetEnvIf Request_URI "\.kepub.epub$" logdownload
|
|
||||||
SetEnvIf Request_URI "\.azw3$" logdownload
|
|
||||||
CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload
|
|
||||||
DirectorySlash Off
|
|
||||||
|
|
||||||
<Directory /standardebooks.org/web/www/>
|
<Directory /standardebooks.org/web/www/>
|
||||||
# Disable .htaccess files
|
# Disable .htaccess files
|
||||||
AllowOverride none
|
AllowOverride none
|
||||||
|
|
72
scripts/rotate-www-logs
Executable file
72
scripts/rotate-www-logs
Executable file
|
@ -0,0 +1,72 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
usage(){
|
||||||
|
fmt <<EOF
|
||||||
|
DESCRIPTION
|
||||||
|
Moves Apache access log files into a by-month subdirectory, and gzip them.
|
||||||
|
|
||||||
|
This script must be run as root, and is generally run by the Apache rotatelogs subprocess as such.
|
||||||
|
|
||||||
|
Log files are moved to <LOG-DIR>/apache/YYYY-MM/
|
||||||
|
|
||||||
|
USAGE
|
||||||
|
rotate-www-logs NEW-LOG-FILENAME
|
||||||
|
EOF
|
||||||
|
exit
|
||||||
|
}
|
||||||
|
die(){ printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2; exit 1; }
|
||||||
|
if [ $# -eq 1 ]; then if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then usage; fi fi
|
||||||
|
# End boilerplate
|
||||||
|
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apache has a habit of starting this script twice, which can stomp on its own files
|
||||||
|
for pid in $(pidof -x rotate-www-logs); do
|
||||||
|
if [ "${pid}" != $$ ]; then
|
||||||
|
# We echo and exit instead of die() because Apache prints stderr to the log, but not stdout. We don't need this logged.
|
||||||
|
echo "rotate-www-logs is already running with PID ${pid}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Prevent the loop from entering if no matches are found for the pattern
|
||||||
|
shopt -s nullglob
|
||||||
|
|
||||||
|
filenameBase=$(basename "$1" | sed --regexp-extended "s/\.[0-9]+$//")
|
||||||
|
directory=$(dirname "$1")
|
||||||
|
|
||||||
|
for filename in ${directory}/${filenameBase}.*; do
|
||||||
|
# When Apache calls this script, it passes the filename of the new log file it created.
|
||||||
|
# Thus, we check here to make sure we don't process and then delete the brand-new log file!
|
||||||
|
if [ "${filename}" != "$1" ]; then
|
||||||
|
# Apache log files can have data for more than one day. Here we pull out entries for different days into different files.
|
||||||
|
dates=$(grep --extended-regexp --only-matching "\[[0-9]{1,2}\/[a-zA-Z]{3}\/20[0-9]{2}" "${filename}" | sort -u)
|
||||||
|
|
||||||
|
while read -r line; do
|
||||||
|
logRawDate=$(echo "${line}" | sed "s/\[//g" | sed "s/\// /g")
|
||||||
|
logDate=$(date -d"${logRawDate}" "+%Y-%m-%d")
|
||||||
|
logMonth=$(date -d"${logRawDate}" "+%Y-%m")
|
||||||
|
grepString=${line//\[/}
|
||||||
|
logFilename="www-access-${logDate}.log"
|
||||||
|
|
||||||
|
mkdir -p "${directory}/${logMonth}"
|
||||||
|
|
||||||
|
# Is the log file already existing and gzipped?
|
||||||
|
if [ -f "${directory}/${logMonth}/${logFilename}.gz" ]; then
|
||||||
|
gunzip "${directory}/${logMonth}/${logFilename}.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ipv6loganon is provided by the `ipv6calc` package
|
||||||
|
grep --extended-regexp "\[${grepString}" "${filename}" | ipv6loganon --anonymize-paranoid >> "${directory}/${logMonth}/${logFilename}"
|
||||||
|
|
||||||
|
gzip --best "${directory}/${logMonth}/${logFilename}"
|
||||||
|
|
||||||
|
chown --preserve-root --recursive www-data:adm "${directory}/${logMonth}"
|
||||||
|
chmod --preserve-root --recursive g+w "${directory}/${logMonth}"
|
||||||
|
done <<< "${dates}"
|
||||||
|
|
||||||
|
rm "${filename}"
|
||||||
|
fi
|
||||||
|
done
|
Loading…
Add table
Add a link
Reference in a new issue