mirror of
https://github.com/standardebooks/web.git
synced 2025-07-13 01:52:02 -04:00
Record and summarize Ebook downloads in the DB (#498)
This commit is contained in:
parent
61b8ca27b1
commit
475c437126
12 changed files with 289 additions and 4 deletions
9
config/sql/se/EbookDownloadSummaries.sql
Normal file
9
config/sql/se/EbookDownloadSummaries.sql
Normal file
|
@ -0,0 +1,9 @@
|
|||
CREATE TABLE IF NOT EXISTS `EbookDownloadSummaries` (
|
||||
`EbookId` int(10) unsigned NOT NULL,
|
||||
`Date` date NOT NULL,
|
||||
`DownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
|
||||
`BotDownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
|
||||
UNIQUE INDEX `idxUnique` (`EbookId`, `Date`),
|
||||
INDEX `index1` (Date, EbookId, DownloadCount),
|
||||
INDEX `index2` (EbookId, DownloadCount)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
7
config/sql/se/EbookDownloads.sql
Normal file
7
config/sql/se/EbookDownloads.sql
Normal file
|
@ -0,0 +1,7 @@
|
|||
CREATE TABLE IF NOT EXISTS `EbookDownloads` (
|
||||
`EbookId` int(10) unsigned NOT NULL,
|
||||
`Created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`IpAddress` inet6 NULL,
|
||||
`UserAgent` mediumtext NULL,
|
||||
INDEX `idxCreated` (`Created`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
@ -27,6 +27,8 @@ CREATE TABLE IF NOT EXISTS `Ebooks` (
|
|||
`IndexableText` text NULL,
|
||||
`IndexableAuthors` text NOT NULL,
|
||||
`IndexableCollections` text NULL,
|
||||
`DownloadsPast30Days` int(10) unsigned NOT NULL DEFAULT 0,
|
||||
`DownloadsTotal` int(10) unsigned NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (`EbookId`),
|
||||
UNIQUE KEY `index1` (`Identifier`),
|
||||
KEY `index2` (`EbookCreated`),
|
||||
|
|
|
@ -83,6 +83,8 @@ final class Ebook{
|
|||
/** When the database row was updated. */
|
||||
public DateTimeImmutable $Updated;
|
||||
public ?int $TextSinglePageByteCount = null;
|
||||
public int $DownloadsPast30Days = 0;
|
||||
public int $DownloadsTotal = 0;
|
||||
|
||||
/** @var array<GitCommit> $_GitCommits */
|
||||
protected array $_GitCommits;
|
||||
|
@ -1421,6 +1423,14 @@ final class Ebook{
|
|||
$error->Add(new Exceptions\InvalidEbookTextSinglePageByteCountException('Invalid Ebook TextSinglePageByteCount: ' . $this->TextSinglePageByteCount));
|
||||
}
|
||||
|
||||
if(isset($this->DownloadsPast30Days) && $this->DownloadsPast30Days < 0){
|
||||
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsPast30Days: ' . $this->DownloadsPast30Days));
|
||||
}
|
||||
|
||||
if(isset($this->DownloadsTotal) && $this->DownloadsTotal < 0){
|
||||
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsTotal: ' . $this->DownloadsTotal));
|
||||
}
|
||||
|
||||
if(sizeof($this->Authors) == 0){
|
||||
$error->Add(new Exceptions\EbookAuthorRequiredException());
|
||||
}
|
||||
|
@ -1819,7 +1829,7 @@ final class Ebook{
|
|||
AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle,
|
||||
Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl,
|
||||
EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors,
|
||||
IndexableCollections)
|
||||
IndexableCollections, DownloadsPast30Days, DownloadsTotal)
|
||||
values (?,
|
||||
?,
|
||||
?,
|
||||
|
@ -1844,6 +1854,8 @@ final class Ebook{
|
|||
?,
|
||||
?,
|
||||
?,
|
||||
?,
|
||||
?,
|
||||
?)
|
||||
returning EbookId
|
||||
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
||||
|
@ -1851,7 +1863,8 @@ final class Ebook{
|
|||
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
||||
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
||||
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
||||
$this->IndexableAuthors, $this->IndexableCollections]);
|
||||
$this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
|
||||
$this->DownloadsTotal]);
|
||||
|
||||
try{
|
||||
$this->AddTags();
|
||||
|
@ -1918,7 +1931,9 @@ final class Ebook{
|
|||
TextSinglePageByteCount = ?,
|
||||
IndexableText = ?,
|
||||
IndexableAuthors = ?,
|
||||
IndexableCollections = ?
|
||||
IndexableCollections = ?,
|
||||
DownloadsPast30Days = ?,
|
||||
DownloadsTotal = ?
|
||||
where
|
||||
EbookId = ?
|
||||
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
||||
|
@ -1926,7 +1941,8 @@ final class Ebook{
|
|||
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
||||
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
||||
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
||||
$this->IndexableAuthors, $this->IndexableCollections,
|
||||
$this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
|
||||
$this->DownloadsTotal,
|
||||
$this->EbookId]);
|
||||
}
|
||||
catch(Exceptions\DuplicateDatabaseKeyException){
|
||||
|
@ -2144,6 +2160,18 @@ final class Ebook{
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exceptions\InvalidEbookDownloadException
|
||||
*/
|
||||
public function AddDownload(?string $ipAddress, ?string $userAgent): void{
|
||||
$ebookDownload = new EbookDownload();
|
||||
$ebookDownload->EbookId = $this->EbookId;
|
||||
$ebookDownload->IpAddress = $ipAddress;
|
||||
$ebookDownload->UserAgent = $userAgent;
|
||||
|
||||
$ebookDownload->Create();
|
||||
}
|
||||
|
||||
public function Delete(): void{
|
||||
$this->RemoveTags();
|
||||
$this->RemoveLocSubjects();
|
||||
|
|
89
lib/EbookDownload.php
Normal file
89
lib/EbookDownload.php
Normal file
|
@ -0,0 +1,89 @@
|
|||
<?
|
||||
|
||||
use Safe\DateTimeImmutable;
|
||||
|
||||
class EbookDownload{
|
||||
public int $EbookId;
|
||||
public DateTimeImmutable $Created;
|
||||
public ?string $IpAddress;
|
||||
public ?string $UserAgent;
|
||||
|
||||
public function IsBot(): bool{
|
||||
if(empty($this->UserAgent) || strlen($this->UserAgent) < 20){
|
||||
return true;
|
||||
}
|
||||
|
||||
$botKeywords = [
|
||||
'bot', 'crawl', 'spider', 'slurp', 'chatgpt', 'search',
|
||||
'python', 'java', 'curl', 'wget', 'scrape'
|
||||
];
|
||||
|
||||
foreach($botKeywords as $keyword){
|
||||
if(strpos($this->UserAgent, $keyword) !== false){
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exceptions\InvalidEbookDownloadException
|
||||
*/
|
||||
public function Validate(): void{
|
||||
$error = new Exceptions\InvalidEbookDownloadException();
|
||||
|
||||
if(!isset($this->EbookId)){
|
||||
$error->Add(new Exceptions\EbookDownloadEbookIdRequiredException());
|
||||
}
|
||||
|
||||
if($this->IpAddress == ''){
|
||||
$this->IpAddress = null;
|
||||
}
|
||||
|
||||
if($this->UserAgent == ''){
|
||||
$this->UserAgent = null;
|
||||
}
|
||||
|
||||
// The `IpAddress` column expects IPv6 address strings.
|
||||
if(is_string($this->IpAddress) && filter_var($this->IpAddress, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)){
|
||||
$this->IpAddress = '::ffff:' . $this->IpAddress;
|
||||
}
|
||||
|
||||
if($error->HasExceptions){
|
||||
throw $error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exceptions\InvalidEbookDownloadException
|
||||
*/
|
||||
public function Create(): void{
|
||||
$this->Validate();
|
||||
|
||||
$this->Created = NOW;
|
||||
|
||||
Db::Query('
|
||||
INSERT into EbookDownloads (EbookId, Created, IpAddress, UserAgent)
|
||||
values (?,
|
||||
?,
|
||||
?,
|
||||
?)
|
||||
', [$this->EbookId, $this->Created, $this->IpAddress, $this->UserAgent]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<EbookDownload>
|
||||
*/
|
||||
public static function GetAllByDate(DateTimeImmutable $date): array{
|
||||
$startDate = $date->setTime(0, 0, 0);
|
||||
$endDate = $date->setTime(0, 0, 0)->modify('+1 day');
|
||||
|
||||
return Db::Query('
|
||||
SELECT *
|
||||
from EbookDownloads
|
||||
where Created >= ?
|
||||
and Created < ?
|
||||
', [$startDate, $endDate], EbookDownload::class);
|
||||
}
|
||||
}
|
52
lib/EbookDownloadSummary.php
Normal file
52
lib/EbookDownloadSummary.php
Normal file
|
@ -0,0 +1,52 @@
|
|||
<?
|
||||
|
||||
use Safe\DateTimeImmutable;
|
||||
|
||||
class EbookDownloadSummary{
|
||||
public int $EbookId;
|
||||
public DateTimeImmutable $Date;
|
||||
public int $DownloadCount = 0;
|
||||
public int $BotDownloadCount = 0;
|
||||
|
||||
public function __construct(int $ebookId, DateTimeImmutable $date){
|
||||
$this->EbookId = $ebookId;
|
||||
$this->Date = $date;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exceptions\InvalidEbookDownloadSummaryException
|
||||
*/
|
||||
public function Validate(): void{
|
||||
$error = new Exceptions\InvalidEbookDownloadSummaryException();
|
||||
|
||||
if($this->DownloadCount < 0){
|
||||
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary DownloadCount: ' . $this->DownloadCount));
|
||||
}
|
||||
|
||||
if($this->BotDownloadCount < 0){
|
||||
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary BotDownloadCount: ' . $this->BotDownloadCount));
|
||||
}
|
||||
|
||||
if($error->HasExceptions){
|
||||
throw $error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exceptions\InvalidEbookDownloadSummaryException
|
||||
*/
|
||||
public function Create(): void{
|
||||
$this->Validate();
|
||||
|
||||
Db::Query('
|
||||
INSERT into EbookDownloadSummaries (EbookId, Date, DownloadCount, BotDownloadCount)
|
||||
values (?,
|
||||
?,
|
||||
?,
|
||||
?)
|
||||
on duplicate key update
|
||||
DownloadCount = value(DownloadCount),
|
||||
BotDownloadCount = value(BotDownloadCount)
|
||||
', [$this->EbookId, $this->Date, $this->DownloadCount, $this->BotDownloadCount]);
|
||||
}
|
||||
}
|
7
lib/Exceptions/EbookDownloadEbookIdRequiredException.php
Normal file
7
lib/Exceptions/EbookDownloadEbookIdRequiredException.php
Normal file
|
@ -0,0 +1,7 @@
|
|||
<?
|
||||
namespace Exceptions;
|
||||
|
||||
class EbookDownloadEbookIdRequiredException extends AppException{
|
||||
/** @var string $message */
|
||||
protected $message = 'EbookDownload EbookId required.';
|
||||
}
|
5
lib/Exceptions/InvalidEbookDownloadCountException.php
Normal file
5
lib/Exceptions/InvalidEbookDownloadCountException.php
Normal file
|
@ -0,0 +1,5 @@
|
|||
<?
|
||||
namespace Exceptions;
|
||||
|
||||
class InvalidEbookDownloadCountException extends AppException{
|
||||
}
|
7
lib/Exceptions/InvalidEbookDownloadException.php
Normal file
7
lib/Exceptions/InvalidEbookDownloadException.php
Normal file
|
@ -0,0 +1,7 @@
|
|||
<?
|
||||
namespace Exceptions;
|
||||
|
||||
class InvalidEbookDownloadException extends ValidationException{
|
||||
/** @var string $message */
|
||||
protected $message = 'EbookDownload is invalid.';
|
||||
}
|
7
lib/Exceptions/InvalidEbookDownloadSummaryException.php
Normal file
7
lib/Exceptions/InvalidEbookDownloadSummaryException.php
Normal file
|
@ -0,0 +1,7 @@
|
|||
<?
|
||||
namespace Exceptions;
|
||||
|
||||
class InvalidEbookDownloadSummaryException extends ValidationException{
|
||||
/** @var string $message */
|
||||
protected $message = 'EbookDownloadSummary is invalid.';
|
||||
}
|
59
scripts/recompute-ebook-downloads
Executable file
59
scripts/recompute-ebook-downloads
Executable file
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/php
|
||||
<?
|
||||
require_once('/standardebooks.org/web/lib/Core.php');
|
||||
|
||||
$downloadDates = Db::Query('
|
||||
SELECT distinct date(Created) as DownloadDate
|
||||
from EbookDownloads
|
||||
');
|
||||
|
||||
foreach($downloadDates as $date){
|
||||
$downloadDate = $date->DownloadDate;
|
||||
$summaries = [];
|
||||
|
||||
$ebookDownloads = EbookDownload::GetAllByDate($downloadDate);
|
||||
foreach($ebookDownloads as $ebookDownload){
|
||||
$ebookId = $ebookDownload->EbookId;
|
||||
|
||||
if(!isset($summaries[$ebookId])){
|
||||
$summaries[$ebookId] = new EbookDownloadSummary($ebookId, $downloadDate);
|
||||
}
|
||||
|
||||
if($ebookDownload->IsBot()){
|
||||
$summaries[$ebookId]->BotDownloadCount++;
|
||||
}
|
||||
else{
|
||||
$summaries[$ebookId]->DownloadCount++;
|
||||
}
|
||||
}
|
||||
|
||||
foreach($summaries as $summary){
|
||||
$summary->Create();
|
||||
}
|
||||
}
|
||||
|
||||
Db::Query('
|
||||
UPDATE Ebooks e
|
||||
left join (
|
||||
select EbookId, sum(DownloadCount) AS DownloadsPast30Days
|
||||
from EbookDownloadSummaries
|
||||
where Date >= curdate() - interval 29 day
|
||||
group by EbookId
|
||||
) s on e.EbookId = s.EbookId
|
||||
set e.DownloadsPast30Days = coalesce(s.DownloadsPast30Days, 0)
|
||||
');
|
||||
|
||||
Db::Query('
|
||||
UPDATE Ebooks e
|
||||
left join (
|
||||
select EbookId, sum(DownloadCount) AS DownloadsTotal
|
||||
from EbookDownloadSummaries
|
||||
group by EbookId
|
||||
) s on e.EbookId = s.EbookId
|
||||
set e.DownloadsTotal = coalesce(s.DownloadsTotal, 0)
|
||||
');
|
||||
|
||||
Db::Query('
|
||||
DELETE from EbookDownloads
|
||||
where Created < utc_timestamp() - interval 60 day
|
||||
');
|
|
@ -34,6 +34,19 @@ try{
|
|||
throw new Exceptions\InvalidFileException();
|
||||
}
|
||||
|
||||
/** @var string|null $ipAddress */
|
||||
$ipAddress = $_SERVER['REMOTE_ADDR'] ?? null;
|
||||
|
||||
/** @var string|null $userAgent */
|
||||
$userAgent = $_SERVER['HTTP_USER_AGENT'] ?? null;
|
||||
|
||||
try{
|
||||
$ebook->AddDownload($ipAddress, $userAgent);
|
||||
}
|
||||
catch(Exceptions\InvalidEbookDownloadException){
|
||||
// Pass. Allow the download to continue even if it isn't recorded.
|
||||
}
|
||||
|
||||
if($skipThankYouPage){
|
||||
// Download the file directly, without showing the thank you page.
|
||||
$downloadUrl = $ebook->GetDownloadUrl($format);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue