diff --git a/README.md b/README.md index dbca1312..3ec11c06 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,52 @@ # Installation -This repository only contains PHP source files, it doesn’t contain configuration for running them on a web server. - PHP 7+ is required. -If you’d like to set up a development environment on your local machine, then you’ll have to configure your own local web server to serve PHP files. +## Installing on Ubuntu 18.04 (Bionic) -You’ll also need to ensure the following: +```shell +# Create the site root and clone this repo into it. +sudo mkdir /standardebooks.org/ +cd /standardebooks.org/ +git clone https://github.com/standardebooks/web/ -- The path `/standardebooks.org/` exists and is the root of this project. (Configurable in `./lib/Constants.php`.) +# Install dependencies using Composer. +cd /standardebooks.org/web/ +composer install -- Your PHP installation must be configured to have `/standardebooks.org/lib/` in its include path. +# Add standardebooks.test to your hosts file. +echo "127.0.0.1\tstandardebooks.test" | sudo tee -a /etc/hosts -- [PHP short open tags](https://www.php.net/manual/en/ini.core.php#ini.short-open-tag) must be enabled. +# Install Apache, PHP, PHP-FPM, and various other dependencies. +sudo apt install -y composer php-gd php-xml php-apcu php-intl apache2 apache2-utils libfcgi0ldbl php-fpm php-cli php-mbstring -- [PHP-APCu](http://php.net/manual/en/book.apcu.php), [PHP-intl](http://php.net/manual/en/book.intl.php), and [Composer](https://getcomposer.org/) must be installed. On Ubuntu this can be done with `sudo apt install php-apcu php-intl composer`. +# Create a self-signed SSL certificate for use with the local web site installation. +openssl req -x509 -nodes -days 99999 -newkey rsa:4096 -subj "/CN=standardebooks.test" -keyout /standardebooks.org/web/config/ssl/standardebooks.test.key -sha256 -out /standardebooks.org/web/config/ssl/standardebooks.test.crt -- Once Composer is installed, next install the SE Composer dependencies: +# Link and enable the SE Apache configuration file. +sudo ln -s /standardebooks.org/web/config/apache/standardebooks.test.conf /etc/apache2/sites-available/ +sudo a2ensite standardebooks.test +sudo systemctl restart apache2.service - ```bash - cd /standardebooks.org/ - composer install - ``` +# Link and enable the SE PHP-FPM pool. +sudo ln -s /standardebooks.org/web/config/php/fpm/standardebooks.test.conf /etc/php/*/fpm/pool.d/ +sudo systemctl restart "php*-fpm.service" +``` -- The URL `^/ebooks/([^\./]+?)/$` must redirect to `/standardebooks.org/ebooks/author.php?url-path=$1` +If everything went well you should now be able to open your web browser and visit `https://standardebooks.test/`. However, you won’t see any ebooks if you visit `https://standardebooks.test/ebooks/`. To install some ebooks, first you have to clone their source from GitHub, then deploy them to your local website using the `./scripts/deploy-ebook-to-www` script: -- The URL `^/ebooks/([^\.]+?)/?$` must redirect to `/standardebooks.org/ebooks/ebook.php?url-path=$1` +```shell +# First, install the SE toolset, which will make the `se build` command-line executable available to the `deploy-ebook-to-www` script: +# https://github.com/standardebooks/tools -- The URL `^/tags/([^\./]+?)/?$` must redirect to `/standardebooks.org/ebooks/index.php?tag=$1` +# Once the toolset is installed, clone a book and deploy it to your local SE site: +mkdir /standardebooks.org/ebooks/ +cd /standardebooks.org/ebooks/ +git clone https://github.com/standardebooks/david-lindsay_a-voyage-to-arcturus +/standardebooks.org/web/scripts/deploy-ebook-to-www david-lindsay_a-voyage-to-arcturus +``` -- The URL `/collections/([^\./]+?)/?$` must redirect to `/standardebooks.org/ebooks/index.php?collection=$1` - -- Your web server should be configured to serve PHP files without the `.php` file extension. (I.e., your web server *internally* redirects `/foo/bar` to `/foo/bar.php`, if `/foo/bar.php` exists.) +If everything went well, `https://standardebooks.test/ebooks/` will show the one ebook you deployed. # Filesystem layout @@ -44,25 +59,25 @@ You’ll also need to ensure the following: /standardebooks.org/ebooks/omar-khayyam_the-rubaiyat-of-omar-khayyam_edward-fitzgerald_edmund-dulac.git/ ```` -- `/standardebooks.org/www/ebooks/` contains a nested hierarchy of deployed ebook files, that are read by the website for display and download. For example, we might have: +- `/standardebooks.org/web/www/ebooks/` contains a nested hierarchy of deployed ebook files, that are read by the website for display and download. For example, we might have: ```` - /standardebooks.org/www/ebooks/maurice-leblanc/ - /standardebooks.org/www/ebooks/maurice-leblanc/the-hollow-needle/ - /standardebooks.org/www/ebooks/maurice-leblanc/the-hollow-needle/alexander-teixeira-de-mattos/ - /standardebooks.org/www/ebooks/maurice-leblanc/813/ - /standardebooks.org/www/ebooks/maurice-leblanc/813/alexander-teixeira-de-mattos/ + /standardebooks.org/web/www/ebooks/maurice-leblanc/ + /standardebooks.org/web/www/ebooks/maurice-leblanc/the-hollow-needle/ + /standardebooks.org/web/www/ebooks/maurice-leblanc/the-hollow-needle/alexander-teixeira-de-mattos/ + /standardebooks.org/web/www/ebooks/maurice-leblanc/813/ + /standardebooks.org/web/www/ebooks/maurice-leblanc/813/alexander-teixeira-de-mattos/ ```` These directories contain the full ebook source, as if it was pulled from Git. (But they are not actual Git repositories.) Additionally each one contains a `./dist/` folder containing built ebook files for distribution. - The website pulls all ebook information from what is contained in `/standardebooks.org/www/ebooks/`. It does not inspect `/standardebooks.org/ebooks/`. Therefore it is possible for one or the other to hold different catalogs if they become out of sync. + The website pulls all ebook information from what is contained in `/standardebooks.org/web/www/ebooks/`. It does not inspect `/standardebooks.org/ebooks/`. Therefore it is possible for one or the other to hold different catalogs if they become out of sync. - To automatically populate your server with ebooks from https://github.com/standardebooks/, you can use sync-ebooks and deploy-ebook-to-www in the [scripts](scripts) directory. If you don't want to clone all ebooks, don't use sync-ebooks, and instead clone the books you want into `/standardebooks.org/ebooks` with `git clone --bare`. To clone a list of books, you can use `while IFS= read -r line; do git clone --bare "${line}"; done < urllist.txt` + To automatically populate your server with ebooks from https://github.com/standardebooks/, you can use sync-ebooks and deploy-ebook-to-www in the [scripts](scripts) directory. If you don’t want to clone all ebooks, don’t use sync-ebooks, and instead clone the books you want into `/standardebooks.org/ebooks` with `git clone --bare`. To clone a list of books, you can use `while IFS= read -r line; do git clone --bare "${line}"; done < urllist.txt` # Testing -This repository includes [PHPStan](https://github.com/phpstan/phpstan) to statically analyze the codebase and [Safe PHP](https://github.com/thecodingmachine/safe) to replace old functions that don't throw exceptions. +This repository includes [PHPStan](https://github.com/phpstan/phpstan) to statically analyze the codebase and [Safe PHP](https://github.com/thecodingmachine/safe) to replace old functions that don’t throw exceptions. To run PHPStan, execute: @@ -181,7 +196,7 @@ After you have installed both, you can start and manage a VM running a server li - The Vagrant script will install [se](https://github.com/standardebooks/tools) by default. If you don't want that (it pulls in quite a few dependencies), remove the `se-tools` argument in Vagrantfile. -- `se`, if installed in the VM, and /standardebooks.org/scripts are in the VMs path. This means you can easily use them with `vagrant ssh -c` like this: `vagrant ssh -c "sync-ebooks -vv /standardebooks.org/ebooks; deploy-ebook-to-www -v --group www-data /standardebooks.org/ebooks/*"`, which would populate the test server with all available SE ebooks. +- `se`, if installed in the VM, and /standardebooks.org/scripts are in the VMs path. This means you can easily use them with `vagrant ssh -c` like this: `vagrant ssh -c "sync-ebooks -vv /standardebooks.org/ebooks; deploy-ebook-to-www -v --group www-data /standardebooks.org/ebooks/*"`, which would populate the test server with all available SE ebooks. - It is safe to re-run the provision script if you did not change the nginx or php configuration files (change the files in the provision script and re-provision instead), so you can use `vagrant up --provision` or `vagrant reload --provision to update the VM, including se-tools and epubcheck, without having to delete it. diff --git a/config/apache/standardebooks.org.conf b/config/apache/standardebooks.org.conf new file mode 100644 index 00000000..5a1b339f --- /dev/null +++ b/config/apache/standardebooks.org.conf @@ -0,0 +1,216 @@ +# Global configuration; see https://securityheaders.com +Header set X-UA-Compatible "IE=edge" +Header set X-Frame-Options "sameorigin" +Header set X-Content-Type-Options "nosniff" +Header set X-Xss-Protection "1; mode=block" +Header set Referrer-Policy "no-referrer-when-downgrade" +ServerTokens prod +ServerSignature off +AddDefaultCharset utf-8 +UseCanonicalName on +LogLevel warn +AddOutputFilterByType deflate image/svg+xml +AddType application/font-woff2 .woff2 +TraceEnable off +Protocols h2 h2c http/1.1 + +# Set up caching directives for infrequently changed files +ExpiresActive On +ExpiresByType application/font-woff "access plus 1 month" +ExpiresByType application/font-woff2 "access plus 1 month" +ExpiresByType application/javascript "access plus 1 month" +ExpiresByType image/gif "access plus 1 month" +ExpiresByType image/png "access plus 1 month" +ExpiresByType image/jpeg "access plus 1 month" +ExpiresByType image/svg+xml "access plus 1 month" +ExpiresByType image/vnd.microsoft.icon "access plus 1 month" +ExpiresByType image/x-icon "access plus 1 month" +ExpiresByType text/css "access plus 1 month" + +# These lines are a workaround for an Apache bug that prevents mod_deflate, etags, and ExpiresByType working at the same time. +# This is probably still broken in 18.04. See https://stackoverflow.com/questions/896974/apache-is-not-sending-304-response-if-mod-deflate-and-addoutputfilterbytype-is +FileETag All +RequestHeader edit "If-None-Match" "^\"(.*)-gzip\"$" "\"$1\"" +Header edit "ETag" "^\"(.*[^g][^z][^i][^p])\"$" "\"$1-gzip\"" + +# SSL hardening; see https://mozilla.github.io/server-side-tls/ssl-config-generator/ +SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 +SSLCipherSuite ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256 +SSLHonorCipherOrder on +SSLCompression off +SSLSessionTickets off +SSLStaplingCache shmcb:/var/run/ocsp(128000) +# SSL Stapling should be off for testing to prevent errors in log files, and on for live +SSLUseStapling on +SSLStaplingResponderTimeout 5 +SSLStaplingReturnResponderErrors off + +Define domain standardebooks.org + + + ServerName ${domain} + ServerAlias www.${domain} + RedirectPermanent / https://${domain}/ + + + + ServerName ${domain} + ServerAlias www.${domain} + DocumentRoot /standardebooks.org/web/www + ErrorDocument 404 /404 + ErrorLog /var/log/local/www-error.log + RewriteEngine on + CustomLog "|/usr/bin/rotatelogs -f -p /standardebooks.org/scripts/rotate-www-logs /var/log/local/apache/www-access.log 86400" combined + + SSLEngine on + SSLCertificateFile /etc/letsencrypt/live/${domain}/fullchain.pem + SSLCertificateKeyFile /etc/letsencrypt/live/${domain}/privkey.pem + Header always set Strict-Transport-Security "max-age=15768000" + + # CSP still causes a lot of problems with Firefox (can't use inline CSS debugger, etc.) so disable for now. + # Header set Content-Security-Policy "default-src 'self';" + + # # Below is required to fix a Firefox bug with CSP and SVG images; see https://pokeinthe.io/2016/04/09/black-icons-with-svg-and-csp/ + # + # Header set Content-Security-Policy "default-src 'none'; frame-ancestors 'none'; style-src 'self' 'unsafe-inline';" + # + + # Log downloads + SetEnvIf Request_URI "\.epub$" logdownload + SetEnvIf Request_URI "\.kepub.epub$" logdownload + SetEnvIf Request_URI "\.epub3$" logdownload + SetEnvIf Request_URI "\.azw3$" logdownload + CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload + + + # Disable .htaccess files + AllowOverride none + + # Disable unneeded options + Options none + + # Allow access to www/ + Require all granted + + # Pass HTTP Authorization headers to PHP-FPM + CGIPassAuth on + + + AddType application/epub+zip .epub .epub3 + AddType application/x-mobi8-ebook .azw3 + + + # Serve distributables using the "download" dialog instead of opening in-browser + # Note: the trailing e in the Header directive is required + SetEnvIf Request_URI ^/ebooks/.+?/dist/(.+)$ FILENAME=$1 + Header set "Content-Disposition" "attachment; filename=%{FILENAME}e" + + + + DirectoryIndex index.xml + + + # We explicitly set the content-type for items in the /vocab/ directory, because Apache doesn't set it for us, + # and we need a content-type header when using the "nosniff" header. See https://bugzilla.mozilla.org/show_bug.cgi?id=1547076 + + Header set Content-Type "text/plain" + + + # Enable HTTP CORS so that browser-based readers like Readium can access opds and ebooks + # See https://github.com/standardebooks/tools/issues/2 + + Header set Access-Control-Allow-Origin "*" + + + # Remove www from requests + RewriteCond %{HTTP_HOST} ^www\.(.+) [NC] + RewriteRule ^ https://%1%{REQUEST_URI} [R=301,L] + + # PHP-FPM configuration + # See https://serverfault.com/questions/450628/apache-2-4-php-fpm-proxypassmatch/510784 + + # Forward all PHP requests to the php-fpm pool for this domain. + + SetHandler "proxy:unix:/run/php/${domain}.sock|fcgi://${domain}" + + + # Set some proxy properties. + + ProxySet connectiontimeout=5 timeout=240 + + + # In case of 404, serve the 404 page specified by ErrorDocument, not the default FPM error page. + # Note that we can't use `ProxyErrorOverride on` because that catches ALL 4xx and 5xx HTTP headers + # and serves the default Apache page for them. + RewriteCond %{REQUEST_FILENAME} \.php$ + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-f + RewriteRule (.*) - [H=text/html] + + # Received: /filename.php and /filename.php exists in filesystem; Result: 301 redirect to /filename and restart request + RewriteCond %{REQUEST_FILENAME} \.php$ + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} -f + RewriteRule ^/(.+)\.php$ /$1 [R=301,L] + + # Received: /filename and /filename.php exists in filesystem; Result: change /filename to /filename.php and continue processing + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-f + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-d + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI}.php -f + RewriteRule ^([^\.]+)$ $1.php [QSA] + # End PHP-FPM configuration + + # Received: /filename and /filename.xml exists in filesystem; Result: rewrite to /filename.xml and end request + RewriteCond %{DOCUMENT_ROOT}%{REQUEST_FILENAME}.xml -f + RewriteRule (.*) $1.xml [L] + + # Favicon rewrites + RewriteRule ^/(apple-touch|android-chrome|favicon|mstile|safari-pinned|browserconfig|manifest)([^/]+)$ /images/favicons/$1$2 [L] + + # List of specific URL rewrites + RewriteRule ^/ebooks/aristotle/the-nicomachean-ethics(.+)$ /ebooks/aristotle/nicomachean-ethics$1 [R=301,L] + RewriteRule ^/ebooks/sir-arthur-conan-doyle(.+)$ /ebooks/arthur-conan-doyle$1 [R=301,L] + RewriteRule ^/alices-adventures-in-wonderland.+$ /ebooks/lewis-carroll/alices-adventures-in-wonderland/$1 [R=301,L] + RewriteRule ^/ebooks/philip-k-dick/short-stories(.+)$ /ebooks/philip-k-dick/short-fiction$1 [R=301,L] + RewriteRule ^/ebooks/benjamin-disraeli/sibyl(.+)$ /ebooks/benjamin-disraeli/sybil$1 [R=301,L] + RewriteRule ^/ebooks/lewis-carroll/alices-adventures-in-wonderland/dist/(.+)$ /ebooks/lewis-carroll/alices-adventures-in-wonderland/john-tenniel/dist/$1 [R=301,L] + RewriteRule ^/ebooks/lewis-carroll/through-the-looking-glass/dist/(.+)$ /ebooks/lewis-carroll/through-the-looking-glass/john-tenniel/dist/$1 [R=301,L] + RewriteRule ^/ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company$ /ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company_george-tolstoy [R=301,L] + RewriteRule ^/ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company/(.+?)$ /ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company_george-tolstoy/$1 [R=301,L] + RewriteRule ^/ebooks/h-g-wells/tales-of-space-and-time(.+)$ /ebooks/h-g-wells/short-fiction$1 [R=301,L] + + # Remove newline characters inserted by accident in some email clients + RewriteRule ^(.*)\r\n[\ ]?(.*)$ $1$2 [R=301,N] + RewriteRule ^(.*)/r/n[\ ]?(.*)$ $1$2 [R=301,N] + RewriteRule ^(.*)/[rn]$ $1 [R=301,N] + + # Redirect cover images with caching sha's to the root image + # We do this because some sites like Google cache the cover image path, so changing it results in lots of 404s + RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.jpg$ /images/covers/$1-$2$3.jpg + + # Force a trailing slash on author directories + RewriteRule ^/(tags|collections|ebooks)/([^\./]+?)$ /$1/$2/ [R=301,L] + RewriteRule ^/ebooks/([^\./]+?)/$ /ebooks/author.php?url-path=$1 [QSA] + RewriteRule ^/tags/([^\./]+?)/$ /ebooks/index.php?tag=$1 [QSA] + RewriteRule ^/collections/([^\./]+?)/$ /ebooks/index.php?collection=$1 [QSA] + + # Prevent this rule from firing if we're getting a distribution file + RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/dist/.+$ + RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/src/.+$ + RewriteRule ^/ebooks/([^\.]+?)/?$ /ebooks/ebook.php?url-path=$1 + + + + ServerName standardebooks.com + ServerAlias www.standardebooks.com + RedirectPermanent / https://${domain}/ + + + + ServerName standardebooks.com + ServerAlias www.standardebooks.com + RedirectPermanent / https://${domain}/ + + SSLEngine on + SSLCertificateFile /etc/letsencrypt/live/standardebooks.com/fullchain.pem + SSLCertificateKeyFile /etc/letsencrypt/live/standardebooks.com/privkey.pem + Header always set Strict-Transport-Security "max-age=15768000" + diff --git a/config/apache/standardebooks.test.conf b/config/apache/standardebooks.test.conf new file mode 100644 index 00000000..1a62b920 --- /dev/null +++ b/config/apache/standardebooks.test.conf @@ -0,0 +1,198 @@ +# Global configuration; see https://securityheaders.com +Header set X-UA-Compatible "IE=edge" +Header set X-Frame-Options "sameorigin" +Header set X-Content-Type-Options "nosniff" +Header set X-Xss-Protection "1; mode=block" +Header set Referrer-Policy "no-referrer-when-downgrade" +ServerTokens prod +ServerSignature off +AddDefaultCharset utf-8 +UseCanonicalName on +LogLevel warn +AddOutputFilterByType deflate image/svg+xml +AddType application/font-woff2 .woff2 +TraceEnable off +Protocols h2 h2c http/1.1 + +# Set up caching directives for infrequently changed files +ExpiresActive On +ExpiresByType application/font-woff "access plus 1 month" +ExpiresByType application/font-woff2 "access plus 1 month" +ExpiresByType application/javascript "access plus 1 month" +ExpiresByType image/gif "access plus 1 month" +ExpiresByType image/png "access plus 1 month" +ExpiresByType image/jpeg "access plus 1 month" +ExpiresByType image/svg+xml "access plus 1 month" +ExpiresByType image/vnd.microsoft.icon "access plus 1 month" +ExpiresByType image/x-icon "access plus 1 month" +ExpiresByType text/css "access plus 1 month" + +# These lines are a workaround for an Apache bug that prevents mod_deflate, etags, and ExpiresByType working at the same time. +# This is probably still broken in 18.04. See https://stackoverflow.com/questions/896974/apache-is-not-sending-304-response-if-mod-deflate-and-addoutputfilterbytype-is +FileETag All +RequestHeader edit "If-None-Match" "^\"(.*)-gzip\"$" "\"$1\"" +Header edit "ETag" "^\"(.*[^g][^z][^i][^p])\"$" "\"$1-gzip\"" + +# SSL hardening; see https://mozilla.github.io/server-side-tls/ssl-config-generator/ +SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1 +SSLCipherSuite ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256 +SSLHonorCipherOrder on +SSLCompression off +SSLSessionTickets off +SSLStaplingCache shmcb:/var/run/ocsp(128000) +# SSL Stapling should be off for testing to prevent errors in log files, and on for live +SSLUseStapling off +SSLStaplingResponderTimeout 5 +SSLStaplingReturnResponderErrors off + +Define domain standardebooks.test + + + ServerName ${domain} + ServerAlias www.${domain} + RedirectPermanent / https://${domain}/ + + + + ServerName ${domain} + ServerAlias www.${domain} + DocumentRoot /standardebooks.org/web/www + ErrorDocument 404 /404 + ErrorLog /var/log/local/www-error.log + RewriteEngine on + + SSLEngine on + SSLCertificateFile /standardebooks.org/web/config/ssl/${domain}.crt + SSLCertificateKeyFile /standardebooks.org/web/config/ssl/${domain}.key + Header always set Strict-Transport-Security "max-age=15768000" + + # CSP still causes a lot of problems with Firefox (can't use inline CSS debugger, etc.) so disable for now. + # Header set Content-Security-Policy "default-src 'self';" + + # # Below is required to fix a Firefox bug with CSP and SVG images; see https://pokeinthe.io/2016/04/09/black-icons-with-svg-and-csp/ + # + # Header set Content-Security-Policy "default-src 'none'; frame-ancestors 'none'; style-src 'self' 'unsafe-inline';" + # + + # Log downloads + SetEnvIf Request_URI "\.epub$" logdownload + SetEnvIf Request_URI "\.kepub.epub$" logdownload + SetEnvIf Request_URI "\.epub3$" logdownload + SetEnvIf Request_URI "\.azw3$" logdownload + CustomLog /var/log/local/downloads.log "%h [%{%Y-%m-%d %H:%M:%S %Z}t] \"%r\" %>s %b" env=logdownload + + + # Disable .htaccess files + AllowOverride none + + # Disable unneeded options + Options none + + # Allow access to www/ + Require all granted + + # Pass HTTP Authorization headers to PHP-FPM + CGIPassAuth on + + + AddType application/epub+zip .epub .epub3 + AddType application/x-mobi8-ebook .azw3 + + + # Serve distributables using the "download" dialog instead of opening in-browser + # Note: the trailing e in the Header directive is required + SetEnvIf Request_URI ^/ebooks/.+?/dist/(.+)$ FILENAME=$1 + Header set "Content-Disposition" "attachment; filename=%{FILENAME}e" + + + + DirectoryIndex index.xml + + + # We explicitly set the content-type for items in the /vocab/ directory, because Apache doesn't set it for us, + # and we need a content-type header when using the "nosniff" header. See https://bugzilla.mozilla.org/show_bug.cgi?id=1547076 + + Header set Content-Type "text/plain" + + + # Enable HTTP CORS so that browser-based readers like Readium can access opds and ebooks + # See https://github.com/standardebooks/tools/issues/2 + + Header set Access-Control-Allow-Origin "*" + + + # Remove www from requests + RewriteCond %{HTTP_HOST} ^www\.(.+) [NC] + RewriteRule ^ https://%1%{REQUEST_URI} [R=301,L] + + # PHP-FPM configuration + # See https://serverfault.com/questions/450628/apache-2-4-php-fpm-proxypassmatch/510784 + + # Forward all PHP requests to the php-fpm pool for this domain. + + SetHandler "proxy:unix:/run/php/${domain}.sock|fcgi://${domain}" + + + # Set some proxy properties. + + ProxySet connectiontimeout=5 timeout=240 + + + # In case of 404, serve the 404 page specified by ErrorDocument, not the default FPM error page. + # Note that we can't use `ProxyErrorOverride on` because that catches ALL 4xx and 5xx HTTP headers + # and serves the default Apache page for them. + RewriteCond %{REQUEST_FILENAME} \.php$ + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-f + RewriteRule (.*) - [H=text/html] + + # Received: /filename.php and /filename.php exists in filesystem; Result: 301 redirect to /filename and restart request + RewriteCond %{REQUEST_FILENAME} \.php$ + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} -f + RewriteRule ^/(.+)\.php$ /$1 [R=301,L] + + # Received: /filename and /filename.php exists in filesystem; Result: change /filename to /filename.php and continue processing + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-f + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI} !-d + RewriteCond %{DOCUMENT_ROOT}/%{REQUEST_URI}.php -f + RewriteRule ^([^\.]+)$ $1.php [QSA] + # End PHP-FPM configuration + + # Received: /filename and /filename.xml exists in filesystem; Result: rewrite to /filename.xml and end request + RewriteCond %{DOCUMENT_ROOT}%{REQUEST_FILENAME}.xml -f + RewriteRule (.*) $1.xml [L] + + # Favicon rewrites + RewriteRule ^/(apple-touch|android-chrome|favicon|mstile|safari-pinned|browserconfig|manifest)([^/]+)$ /images/favicons/$1$2 [L] + + # List of specific URL rewrites + RewriteRule ^/ebooks/aristotle/the-nicomachean-ethics(.+)$ /ebooks/aristotle/nicomachean-ethics$1 [R=301,L] + RewriteRule ^/ebooks/sir-arthur-conan-doyle(.+)$ /ebooks/arthur-conan-doyle$1 [R=301,L] + RewriteRule ^/alices-adventures-in-wonderland.+$ /ebooks/lewis-carroll/alices-adventures-in-wonderland/$1 [R=301,L] + RewriteRule ^/ebooks/philip-k-dick/short-stories(.+)$ /ebooks/philip-k-dick/short-fiction$1 [R=301,L] + RewriteRule ^/ebooks/benjamin-disraeli/sibyl(.+)$ /ebooks/benjamin-disraeli/sybil$1 [R=301,L] + RewriteRule ^/ebooks/lewis-carroll/alices-adventures-in-wonderland/dist/(.+)$ /ebooks/lewis-carroll/alices-adventures-in-wonderland/john-tenniel/dist/$1 [R=301,L] + RewriteRule ^/ebooks/lewis-carroll/through-the-looking-glass/dist/(.+)$ /ebooks/lewis-carroll/through-the-looking-glass/john-tenniel/dist/$1 [R=301,L] + RewriteRule ^/ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company$ /ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company_george-tolstoy [R=301,L] + RewriteRule ^/ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company/(.+?)$ /ebooks/nikolai-gogol/short-fiction/claud-field_isabel-f-hapgood_vizetelly-and-company_george-tolstoy/$1 [R=301,L] + RewriteRule ^/ebooks/h-g-wells/tales-of-space-and-time(.+)$ /ebooks/h-g-wells/short-fiction$1 [R=301,L] + + # Remove newline characters inserted by accident in some email clients + RewriteRule ^(.*)\r\n[\ ]?(.*)$ $1$2 [R=301,N] + RewriteRule ^(.*)/r/n[\ ]?(.*)$ $1$2 [R=301,N] + RewriteRule ^(.*)/[rn]$ $1 [R=301,N] + + # Redirect cover images with caching sha's to the root image + # We do this because some sites like Google cache the cover image path, so changing it results in lots of 404s + RewriteRule ^/images/covers/(.+?)\-[a-z0-9]{8}\-(cover|hero)(@2x)?\.jpg$ /images/covers/$1-$2$3.jpg + + # Force a trailing slash on author directories + RewriteRule ^/(tags|collections|ebooks)/([^\./]+?)$ /$1/$2/ [R=301,L] + RewriteRule ^/ebooks/([^\./]+?)/$ /ebooks/author.php?url-path=$1 [QSA] + RewriteRule ^/tags/([^\./]+?)/$ /ebooks/index.php?tag=$1 [QSA] + RewriteRule ^/collections/([^\./]+?)/$ /ebooks/index.php?collection=$1 [QSA] + + # Prevent this rule from firing if we're getting a distribution file + RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/dist/.+$ + RewriteCond %{REQUEST_FILENAME} !^/ebooks/.+?/src/.+$ + RewriteRule ^/ebooks/([^\.]+?)/?$ /ebooks/ebook.php?url-path=$1 + diff --git a/config/php/fpm/standardebooks.org.conf b/config/php/fpm/standardebooks.org.conf new file mode 100644 index 00000000..e7867cec --- /dev/null +++ b/config/php/fpm/standardebooks.org.conf @@ -0,0 +1,18 @@ +[standardebooks.org] +user = www-data +group = www-data + +listen = /run/php/standardebooks.org.sock +listen.owner = www-data +listen.group = www-data + +pm = ondemand +pm.max_children = 20 +pm.process_idle_timeout = 30s +pm.max_requests = 200 + +request_slowlog_timeout = 10s +slowlog = /var/log/local/php-fpm-slow.log +catch_workers_output = yes + +php_admin_value[include_path] = /standardebooks.org/web/lib diff --git a/config/php/fpm/standardebooks.test.conf b/config/php/fpm/standardebooks.test.conf new file mode 100644 index 00000000..230c05f3 --- /dev/null +++ b/config/php/fpm/standardebooks.test.conf @@ -0,0 +1,18 @@ +[standardebooks.test] +user = www-data +group = www-data + +listen = /run/php/standardebooks.test.sock +listen.owner = www-data +listen.group = www-data + +pm = ondemand +pm.max_children = 10 +pm.process_idle_timeout = 30s +pm.max_requests = 200 + +request_slowlog_timeout = 10s +slowlog = /var/log/local/php-fpm-slow.log +catch_workers_output = yes + +php_admin_value[include_path] = /standardebooks.org/web/lib:/standardebooks.org/web/vendor diff --git a/config/phpstan/phpstan.neon b/config/phpstan/phpstan.neon new file mode 100644 index 00000000..5a46ff53 --- /dev/null +++ b/config/phpstan/phpstan.neon @@ -0,0 +1,14 @@ +# This is the config file for PHPStan when run from the command line. + +includes: + - ../../vendor/thecodingmachine/phpstan-safe-rule/phpstan-safe-rule.neon + +parameters: + ignoreErrors: + # Ignore errors caused by Template static class reflection + - '#Call to an undefined static method Template::[a-zA-Z0-9\\_]+\(\)\.#' + level: + 7 + paths: + - %rootDir%/../../../lib + - %rootDir%/../../../www diff --git a/config/ssl/standardebooks.test.crt b/config/ssl/standardebooks.test.crt new file mode 100644 index 00000000..c41b12ca --- /dev/null +++ b/config/ssl/standardebooks.test.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDIjCCAgoCCQDYdm2EbEmyajANBgkqhkiG9w0BAQsFADBSMQswCQYDVQQGEwJV +UzELMAkGA1UECAwCSUwxGDAWBgNVBAoMD1N0YW5kYXJkIEVib29rczEcMBoGA1UE +AwwTc3RhbmRhcmRlYm9va3MudGVzdDAgFw0xODA0MDUxNzMwNTVaGA8yMjkyMDEx +ODE3MzA1NVowUjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAklMMRgwFgYDVQQKDA9T +dGFuZGFyZCBFYm9va3MxHDAaBgNVBAMME3N0YW5kYXJkZWJvb2tzLnRlc3QwggEi +MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDyS4SPT6XwrCRFH1ZBEvLyVze6 +C85DJBRkfMU4EQDmepkRea+YuGPvc/UaopT2keAvmnfyxIeJHlp6pWW4dCNGnCv/ +GTVak/st+TvXT7D9PYNkLiQH9ZfP/8Yhf+/D9C3D57dAHXtqScpbIS81hBO9pxB3 +Z8de975BehrmOfzKan+nGSg3H7j3NVqa4+mt/3u50ma6nqDt2upTF65DLk6DGsqL +gBbHsQ4qkbvhaTRqNf3dQMVlwf0iK1MSzbPuIE09Nu8ggzDXo2IkEZD9te6NacLT +N6uk+3lN2YHSCxHyD4biLNjbpX7pXxK61Sh8hk/chJiIAakifGX8X4zjXDIpAgMB +AAEwDQYJKoZIhvcNAQELBQADggEBALJ9pfuDiRqaD87TaEgn+tliV27bXJqbEH8f +QgWEah3tQ152ej4HisLOkzvr3Q2FtmS+d5J8tLDgMnzL224C77y6GMVQr1tJzLv6 +2ayQqq2upg5ZSML8rLPP0d2QAuyfxDpyKm8YuidVyx/3ET/w8KGYPi3XxLBLkVOv +BL44XRcx596TrJeX8jgW7gXXvO9uEMPBzQeq7LTfpSy7gEpmuOeSaSNFbW7jJpnO +2OdvMO3yNwky3Nf91LCt4QQmawfIHLT9zeFBq9x0wzSbimtTKD0vRAh/JVBK2CoY +qZlCuaq1ilZUoRm+lr1cnSrLDWUpuzOxuhPTmMNhDT8XSrjISYI= +-----END CERTIFICATE----- diff --git a/config/ssl/standardebooks.test.key b/config/ssl/standardebooks.test.key new file mode 100644 index 00000000..7f2d8ade --- /dev/null +++ b/config/ssl/standardebooks.test.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEA8kuEj0+l8KwkRR9WQRLy8lc3ugvOQyQUZHzFOBEA5nqZEXmv +mLhj73P1GqKU9pHgL5p38sSHiR5aeqVluHQjRpwr/xk1WpP7Lfk710+w/T2DZC4k +B/WXz//GIX/vw/Qtw+e3QB17aknKWyEvNYQTvacQd2fHXve+QXoa5jn8ymp/pxko +Nx+49zVamuPprf97udJmup6g7drqUxeuQy5OgxrKi4AWx7EOKpG74Wk0ajX93UDF +ZcH9IitTEs2z7iBNPTbvIIMw16NiJBGQ/bXujWnC0zerpPt5TdmB0gsR8g+G4izY +26V+6V8SutUofIZP3ISYiAGpInxl/F+M41wyKQIDAQABAoIBABlW+IgcZlhjSG/K +NTUdDDd4E6XrcijnbxKLUfzpGlUAYV3IgirQrfLvB6U/d6DJ5M5Vs1G6aApZGFFX +VuoRi1bpIljNhBzy+QP32uBLv1DczA8AHFiQwM3KDCiYvrSZpuekJjAYZT3adcVO +FSfdpkrhXAGA4wL1mfozEPke+2cXxCQT2+ya7HcryEXeKCX3ioAmNkRnPMbalmWs +h5fwPLU/zYEGdyjpy/v1GJOupCv/TdO23sW6a6P6QmX7e3JZL2UMP2Db1W3H5nrb +rhuVkdPv5emWG5BRKYNiYutdDM3LAhCTvofwZ/HTf2XgTZtFtjY9u53OrNeuuRfD +EPSTc5kCgYEA/ThSNta8pCJFsSovxJXVX9YJuS+bhkEhCkwR78yLGSOf+bneAfQU +psl73FBzMHgsInGNNdfu6HhEdOUr2SPc/rTmPfcXmVMB1lou0qWXsdP6o5Omi+/G +94F6A14wW409P5D7M4SzmQpcscz9Eo3PQEiRWZqc3e7rqPKtrTrb7aMCgYEA9PR9 +4qShykbYhn5IgBNSDnSHEzAen/AOKmt15b0WOUbPMxgm5ISw6b3S2L1Jr5WUD3L+ +/ZDq2/NpSBhY0YiTxOy6ASAJktOKTehusmtnKtCG4fAouUbrYZTy3hQ9AzrFlbGj +0niGe30FxOb7loIzO6c5no/IWIWti0TaHFQmBcMCgYA4vU8GxLU+XgOZwCc1+JD4 +SYCiQz2pJpzdrs/yGLYuauIk1fYeH/Aks08SveAhwzu4eBM5NmptIs3MF7HslPGn +DSfuSJi8HXJy8oA9aSeFjEdEMgiGoLLHsPaDHK7fRNWtkbIICDsqFr1QhetLJgWD +lP8kwdVWkXuyvrYV8/Nl2wKBgAffG3R/CdvwLuETiczj0g043WZWc4V8N1hRn7ZE +P0ST6/CeZBGP2geI3A+u9YE480sTzPHlCEcNKWLxtVj2ZJk9ajTDDMu1FIqwX25I +WUZc8RitmQFrMAEwwiU0zUkfSF4pGTSYPuoU+Gx9Z7RMds0B8K4RvMRrd0o0ajXi +1l31AoGAfG7CP4gkPBjQDsXHJDVLdit+/V35M28+3q6U7YRa9oqM6jyfUbspKWsZ +gk3H3KKgeoyshZmZc3gIOcQLsRR9vniRdlLZuVato1EI4PzRRkX6oYfYLmRI9QT1 +ZphRiF/4xHsdh0/KVLXQOGiwDvdKP+3hH7TX9Qm9z4yDES7FLHA= +-----END RSA PRIVATE KEY----- diff --git a/lib/Constants.php b/lib/Constants.php index c94a23fb..68e54f78 100644 --- a/lib/Constants.php +++ b/lib/Constants.php @@ -29,8 +29,9 @@ const AVERAGE_READING_WORDS_PER_MINUTE = 275; // No trailing slash on any of the below constants. const SITE_URL = 'https://standardebooks.org'; const SITE_ROOT = '/standardebooks.org'; -const TEMPLATES_PATH = SITE_ROOT . '/templates'; +const TEMPLATES_PATH = SITE_ROOT . '/web/templates'; const REPOS_PATH = SITE_ROOT . '/ebooks'; +const EBOOKS_DIST_PATH = SITE_ROOT . '/web/www/ebooks/'; const GITHUB_SECRET_FILE_PATH = SITE_ROOT . '/config/secrets/se-vcs-bot@github.com'; // Set in the GitHub organization global webhook settings. const GITHUB_WEBHOOK_LOG_FILE_PATH = '/var/log/local/webhooks-github.log'; // Must be writable by `www-data` Unix user. diff --git a/lib/Ebook.php b/lib/Ebook.php index 0192ca0c..8947341c 100644 --- a/lib/Ebook.php +++ b/lib/Ebook.php @@ -51,7 +51,7 @@ class Ebook{ public function __construct(string $wwwFilesystemPath){ // First, construct a source repo path from our WWW filesystem path. - $this->RepoFilesystemPath = str_replace(SITE_ROOT . '/www/ebooks/', '', $wwwFilesystemPath); + $this->RepoFilesystemPath = str_replace(EBOOKS_DIST_PATH, '', $wwwFilesystemPath); $this->RepoFilesystemPath = SITE_ROOT . '/ebooks/' . str_replace('/', '_', $this->RepoFilesystemPath) . '.git'; if(!is_dir($this->RepoFilesystemPath)){ // On dev systems we might not have the bare repos, so make an adjustment @@ -71,7 +71,7 @@ class Ebook{ } $this->WwwFilesystemPath = $wwwFilesystemPath; - $this->Url = str_replace(SITE_ROOT . '/www', '', $this->WwwFilesystemPath); + $this->Url = str_replace(SITE_ROOT . '/web/www', '', $this->WwwFilesystemPath); $rawMetadata = file_get_contents($wwwFilesystemPath . '/src/epub/content.opf') ?: ''; diff --git a/lib/Library.php b/lib/Library.php index 3fe2447e..f67c5837 100644 --- a/lib/Library.php +++ b/lib/Library.php @@ -106,22 +106,27 @@ class Library{ $ebooks = apcu_fetch('ebooks'); } catch(Safe\Exceptions\ApcuException $ex){ - foreach(explode("\n", trim(shell_exec('find ' . SITE_ROOT . '/www/ebooks/ -name "content.opf"') ?? '')) as $filename){ - $ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $filename) ?: ''; - try{ - $ebook = apcu_fetch('ebook-' . $ebookWwwFilesystemPath); - } - catch(Safe\Exceptions\ApcuException $ex){ + foreach(explode("\n", trim(shell_exec('find ' . EBOOKS_DIST_PATH . ' -name "content.opf"') ?? '')) as $filename){ + if(trim($filename) != ''){ + $ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $filename) ?: ''; + $ebook = null; try{ - $ebook = new Ebook($ebookWwwFilesystemPath); - apcu_store('ebook-' . $ebookWwwFilesystemPath, $ebook); + $ebook = apcu_fetch('ebook-' . $ebookWwwFilesystemPath); } - catch(InvalidEbookException $ieEx){ - // Do nothing if one specific ebook is causing problems + catch(Safe\Exceptions\ApcuException $ex){ + try{ + $ebook = new Ebook($ebookWwwFilesystemPath); + apcu_store('ebook-' . $ebookWwwFilesystemPath, $ebook); + } + catch(InvalidEbookException $ieEx){ + // Do nothing if one specific ebook is causing problems + } + } + + if($ebook !== null){ + $ebooks[] = $ebook; } } - - $ebooks[] = $ebook; } apcu_store('ebooks', $ebooks); @@ -173,7 +178,7 @@ class Library{ catch(Safe\Exceptions\ApcuException $ex){ $ebooks = []; - foreach(explode("\n", trim(shell_exec('find ' . SITE_ROOT . '/www/ebooks/ -name "content.opf"') ?? '')) as $filename){ + foreach(explode("\n", trim(shell_exec('find ' . EBOOKS_DIST_PATH . ' -name "content.opf"') ?? '')) as $filename){ try{ $ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $filename) ?? ''; try{ @@ -207,7 +212,7 @@ class Library{ catch(Safe\Exceptions\ApcuException $ex){ $ebooks = []; - foreach(explode("\n", trim(shell_exec('find ' . SITE_ROOT . '/www/ebooks/ -name "content.opf"') ?? '')) as $filename){ + foreach(explode("\n", trim(shell_exec('find ' . EBOOKS_DIST_PATH . ' -name "content.opf"') ?? '')) as $filename){ try{ $ebookWwwFilesystemPath = preg_replace('|/src/.+|ius', '', $filename) ?? ''; try{ diff --git a/scripts/deploy-ebook-to-www b/scripts/deploy-ebook-to-www index f6a4c759..394009bb 100755 --- a/scripts/deploy-ebook-to-www +++ b/scripts/deploy-ebook-to-www @@ -20,7 +20,7 @@ require(){ command -v "$1" > /dev/null 2>&1 || { suggestion=""; if [ -n "$2" ]; verbose="false" group="se" -webRoot="/standardebooks.org" +webRoot="/standardebooks.org/web" webUrl="https://standardebooks.org" if [ $# -eq 0 ]; then diff --git a/scripts/generate-opds.php b/scripts/generate-opds.php index 7c8ccc6c..b2e218ee 100755 --- a/scripts/generate-opds.php +++ b/scripts/generate-opds.php @@ -1,7 +1,7 @@ &1', $output, $returnCode); + exec('sudo -H -u se-vcs-bot /standardebooks.org/web/scripts/deploy-ebook-to-www ' . escapeshellarg($dir) . ' 2>&1', $output, $returnCode); if($returnCode != 0){ Logger::WriteGithubWebhookLogEntry($requestId, 'Error deploying ebook to web. Output: ' . implode("\n", $output)); throw new WebhookException('Couldn\'t process ebook.', $post);