Record and summarize Ebook downloads in the DB (#498)

This commit is contained in:
Mike Colagrosso 2025-05-22 10:23:24 -06:00 committed by GitHub
parent 61b8ca27b1
commit 475c437126
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 289 additions and 4 deletions

View file

@ -0,0 +1,9 @@
CREATE TABLE IF NOT EXISTS `EbookDownloadSummaries` (
`EbookId` int(10) unsigned NOT NULL,
`Date` date NOT NULL,
`DownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
`BotDownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
UNIQUE INDEX `idxUnique` (`EbookId`, `Date`),
INDEX `index1` (Date, EbookId, DownloadCount),
INDEX `index2` (EbookId, DownloadCount)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View file

@ -0,0 +1,7 @@
CREATE TABLE IF NOT EXISTS `EbookDownloads` (
`EbookId` int(10) unsigned NOT NULL,
`Created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`IpAddress` inet6 NULL,
`UserAgent` mediumtext NULL,
INDEX `idxCreated` (`Created`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View file

@ -27,6 +27,8 @@ CREATE TABLE IF NOT EXISTS `Ebooks` (
`IndexableText` text NULL, `IndexableText` text NULL,
`IndexableAuthors` text NOT NULL, `IndexableAuthors` text NOT NULL,
`IndexableCollections` text NULL, `IndexableCollections` text NULL,
`DownloadsPast30Days` int(10) unsigned NOT NULL DEFAULT 0,
`DownloadsTotal` int(10) unsigned NOT NULL DEFAULT 0,
PRIMARY KEY (`EbookId`), PRIMARY KEY (`EbookId`),
UNIQUE KEY `index1` (`Identifier`), UNIQUE KEY `index1` (`Identifier`),
KEY `index2` (`EbookCreated`), KEY `index2` (`EbookCreated`),

View file

@ -83,6 +83,8 @@ final class Ebook{
/** When the database row was updated. */ /** When the database row was updated. */
public DateTimeImmutable $Updated; public DateTimeImmutable $Updated;
public ?int $TextSinglePageByteCount = null; public ?int $TextSinglePageByteCount = null;
public int $DownloadsPast30Days = 0;
public int $DownloadsTotal = 0;
/** @var array<GitCommit> $_GitCommits */ /** @var array<GitCommit> $_GitCommits */
protected array $_GitCommits; protected array $_GitCommits;
@ -1421,6 +1423,14 @@ final class Ebook{
$error->Add(new Exceptions\InvalidEbookTextSinglePageByteCountException('Invalid Ebook TextSinglePageByteCount: ' . $this->TextSinglePageByteCount)); $error->Add(new Exceptions\InvalidEbookTextSinglePageByteCountException('Invalid Ebook TextSinglePageByteCount: ' . $this->TextSinglePageByteCount));
} }
if(isset($this->DownloadsPast30Days) && $this->DownloadsPast30Days < 0){
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsPast30Days: ' . $this->DownloadsPast30Days));
}
if(isset($this->DownloadsTotal) && $this->DownloadsTotal < 0){
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsTotal: ' . $this->DownloadsTotal));
}
if(sizeof($this->Authors) == 0){ if(sizeof($this->Authors) == 0){
$error->Add(new Exceptions\EbookAuthorRequiredException()); $error->Add(new Exceptions\EbookAuthorRequiredException());
} }
@ -1819,7 +1829,7 @@ final class Ebook{
AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle, AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle,
Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl, Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl,
EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors, EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors,
IndexableCollections) IndexableCollections, DownloadsPast30Days, DownloadsTotal)
values (?, values (?,
?, ?,
?, ?,
@ -1844,6 +1854,8 @@ final class Ebook{
?, ?,
?, ?,
?, ?,
?,
?,
?) ?)
returning EbookId returning EbookId
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl, ', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
@ -1851,7 +1863,8 @@ final class Ebook{
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription, $this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl, $this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText, $this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
$this->IndexableAuthors, $this->IndexableCollections]); $this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
$this->DownloadsTotal]);
try{ try{
$this->AddTags(); $this->AddTags();
@ -1918,7 +1931,9 @@ final class Ebook{
TextSinglePageByteCount = ?, TextSinglePageByteCount = ?,
IndexableText = ?, IndexableText = ?,
IndexableAuthors = ?, IndexableAuthors = ?,
IndexableCollections = ? IndexableCollections = ?,
DownloadsPast30Days = ?,
DownloadsTotal = ?
where where
EbookId = ? EbookId = ?
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl, ', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
@ -1926,7 +1941,8 @@ final class Ebook{
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription, $this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl, $this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText, $this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
$this->IndexableAuthors, $this->IndexableCollections, $this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
$this->DownloadsTotal,
$this->EbookId]); $this->EbookId]);
} }
catch(Exceptions\DuplicateDatabaseKeyException){ catch(Exceptions\DuplicateDatabaseKeyException){
@ -2144,6 +2160,18 @@ final class Ebook{
} }
} }
/**
* @throws Exceptions\InvalidEbookDownloadException
*/
public function AddDownload(?string $ipAddress, ?string $userAgent): void{
$ebookDownload = new EbookDownload();
$ebookDownload->EbookId = $this->EbookId;
$ebookDownload->IpAddress = $ipAddress;
$ebookDownload->UserAgent = $userAgent;
$ebookDownload->Create();
}
public function Delete(): void{ public function Delete(): void{
$this->RemoveTags(); $this->RemoveTags();
$this->RemoveLocSubjects(); $this->RemoveLocSubjects();

89
lib/EbookDownload.php Normal file
View file

@ -0,0 +1,89 @@
<?
use Safe\DateTimeImmutable;
class EbookDownload{
public int $EbookId;
public DateTimeImmutable $Created;
public ?string $IpAddress;
public ?string $UserAgent;
public function IsBot(): bool{
if(empty($this->UserAgent) || strlen($this->UserAgent) < 20){
return true;
}
$botKeywords = [
'bot', 'crawl', 'spider', 'slurp', 'chatgpt', 'search',
'python', 'java', 'curl', 'wget', 'scrape'
];
foreach($botKeywords as $keyword){
if(strpos($this->UserAgent, $keyword) !== false){
return true;
}
}
return false;
}
/**
* @throws Exceptions\InvalidEbookDownloadException
*/
public function Validate(): void{
$error = new Exceptions\InvalidEbookDownloadException();
if(!isset($this->EbookId)){
$error->Add(new Exceptions\EbookDownloadEbookIdRequiredException());
}
if($this->IpAddress == ''){
$this->IpAddress = null;
}
if($this->UserAgent == ''){
$this->UserAgent = null;
}
// The `IpAddress` column expects IPv6 address strings.
if(is_string($this->IpAddress) && filter_var($this->IpAddress, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)){
$this->IpAddress = '::ffff:' . $this->IpAddress;
}
if($error->HasExceptions){
throw $error;
}
}
/**
* @throws Exceptions\InvalidEbookDownloadException
*/
public function Create(): void{
$this->Validate();
$this->Created = NOW;
Db::Query('
INSERT into EbookDownloads (EbookId, Created, IpAddress, UserAgent)
values (?,
?,
?,
?)
', [$this->EbookId, $this->Created, $this->IpAddress, $this->UserAgent]);
}
/**
* @return array<EbookDownload>
*/
public static function GetAllByDate(DateTimeImmutable $date): array{
$startDate = $date->setTime(0, 0, 0);
$endDate = $date->setTime(0, 0, 0)->modify('+1 day');
return Db::Query('
SELECT *
from EbookDownloads
where Created >= ?
and Created < ?
', [$startDate, $endDate], EbookDownload::class);
}
}

View file

@ -0,0 +1,52 @@
<?
use Safe\DateTimeImmutable;
class EbookDownloadSummary{
public int $EbookId;
public DateTimeImmutable $Date;
public int $DownloadCount = 0;
public int $BotDownloadCount = 0;
public function __construct(int $ebookId, DateTimeImmutable $date){
$this->EbookId = $ebookId;
$this->Date = $date;
}
/**
* @throws Exceptions\InvalidEbookDownloadSummaryException
*/
public function Validate(): void{
$error = new Exceptions\InvalidEbookDownloadSummaryException();
if($this->DownloadCount < 0){
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary DownloadCount: ' . $this->DownloadCount));
}
if($this->BotDownloadCount < 0){
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary BotDownloadCount: ' . $this->BotDownloadCount));
}
if($error->HasExceptions){
throw $error;
}
}
/**
* @throws Exceptions\InvalidEbookDownloadSummaryException
*/
public function Create(): void{
$this->Validate();
Db::Query('
INSERT into EbookDownloadSummaries (EbookId, Date, DownloadCount, BotDownloadCount)
values (?,
?,
?,
?)
on duplicate key update
DownloadCount = value(DownloadCount),
BotDownloadCount = value(BotDownloadCount)
', [$this->EbookId, $this->Date, $this->DownloadCount, $this->BotDownloadCount]);
}
}

View file

@ -0,0 +1,7 @@
<?
namespace Exceptions;
class EbookDownloadEbookIdRequiredException extends AppException{
/** @var string $message */
protected $message = 'EbookDownload EbookId required.';
}

View file

@ -0,0 +1,5 @@
<?
namespace Exceptions;
class InvalidEbookDownloadCountException extends AppException{
}

View file

@ -0,0 +1,7 @@
<?
namespace Exceptions;
class InvalidEbookDownloadException extends ValidationException{
/** @var string $message */
protected $message = 'EbookDownload is invalid.';
}

View file

@ -0,0 +1,7 @@
<?
namespace Exceptions;
class InvalidEbookDownloadSummaryException extends ValidationException{
/** @var string $message */
protected $message = 'EbookDownloadSummary is invalid.';
}

View file

@ -0,0 +1,59 @@
#!/usr/bin/php
<?
require_once('/standardebooks.org/web/lib/Core.php');
$downloadDates = Db::Query('
SELECT distinct date(Created) as DownloadDate
from EbookDownloads
');
foreach($downloadDates as $date){
$downloadDate = $date->DownloadDate;
$summaries = [];
$ebookDownloads = EbookDownload::GetAllByDate($downloadDate);
foreach($ebookDownloads as $ebookDownload){
$ebookId = $ebookDownload->EbookId;
if(!isset($summaries[$ebookId])){
$summaries[$ebookId] = new EbookDownloadSummary($ebookId, $downloadDate);
}
if($ebookDownload->IsBot()){
$summaries[$ebookId]->BotDownloadCount++;
}
else{
$summaries[$ebookId]->DownloadCount++;
}
}
foreach($summaries as $summary){
$summary->Create();
}
}
Db::Query('
UPDATE Ebooks e
left join (
select EbookId, sum(DownloadCount) AS DownloadsPast30Days
from EbookDownloadSummaries
where Date >= curdate() - interval 29 day
group by EbookId
) s on e.EbookId = s.EbookId
set e.DownloadsPast30Days = coalesce(s.DownloadsPast30Days, 0)
');
Db::Query('
UPDATE Ebooks e
left join (
select EbookId, sum(DownloadCount) AS DownloadsTotal
from EbookDownloadSummaries
group by EbookId
) s on e.EbookId = s.EbookId
set e.DownloadsTotal = coalesce(s.DownloadsTotal, 0)
');
Db::Query('
DELETE from EbookDownloads
where Created < utc_timestamp() - interval 60 day
');

View file

@ -34,6 +34,19 @@ try{
throw new Exceptions\InvalidFileException(); throw new Exceptions\InvalidFileException();
} }
/** @var string|null $ipAddress */
$ipAddress = $_SERVER['REMOTE_ADDR'] ?? null;
/** @var string|null $userAgent */
$userAgent = $_SERVER['HTTP_USER_AGENT'] ?? null;
try{
$ebook->AddDownload($ipAddress, $userAgent);
}
catch(Exceptions\InvalidEbookDownloadException){
// Pass. Allow the download to continue even if it isn't recorded.
}
if($skipThankYouPage){ if($skipThankYouPage){
// Download the file directly, without showing the thank you page. // Download the file directly, without showing the thank you page.
$downloadUrl = $ebook->GetDownloadUrl($format); $downloadUrl = $ebook->GetDownloadUrl($format);