mirror of
https://github.com/standardebooks/web.git
synced 2025-07-13 01:52:02 -04:00
Record and summarize Ebook downloads in the DB (#498)
This commit is contained in:
parent
61b8ca27b1
commit
475c437126
12 changed files with 289 additions and 4 deletions
9
config/sql/se/EbookDownloadSummaries.sql
Normal file
9
config/sql/se/EbookDownloadSummaries.sql
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
CREATE TABLE IF NOT EXISTS `EbookDownloadSummaries` (
|
||||||
|
`EbookId` int(10) unsigned NOT NULL,
|
||||||
|
`Date` date NOT NULL,
|
||||||
|
`DownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
|
||||||
|
`BotDownloadCount` int(10) unsigned NOT NULL DEFAULT 0,
|
||||||
|
UNIQUE INDEX `idxUnique` (`EbookId`, `Date`),
|
||||||
|
INDEX `index1` (Date, EbookId, DownloadCount),
|
||||||
|
INDEX `index2` (EbookId, DownloadCount)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
7
config/sql/se/EbookDownloads.sql
Normal file
7
config/sql/se/EbookDownloads.sql
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE TABLE IF NOT EXISTS `EbookDownloads` (
|
||||||
|
`EbookId` int(10) unsigned NOT NULL,
|
||||||
|
`Created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
`IpAddress` inet6 NULL,
|
||||||
|
`UserAgent` mediumtext NULL,
|
||||||
|
INDEX `idxCreated` (`Created`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
@ -27,6 +27,8 @@ CREATE TABLE IF NOT EXISTS `Ebooks` (
|
||||||
`IndexableText` text NULL,
|
`IndexableText` text NULL,
|
||||||
`IndexableAuthors` text NOT NULL,
|
`IndexableAuthors` text NOT NULL,
|
||||||
`IndexableCollections` text NULL,
|
`IndexableCollections` text NULL,
|
||||||
|
`DownloadsPast30Days` int(10) unsigned NOT NULL DEFAULT 0,
|
||||||
|
`DownloadsTotal` int(10) unsigned NOT NULL DEFAULT 0,
|
||||||
PRIMARY KEY (`EbookId`),
|
PRIMARY KEY (`EbookId`),
|
||||||
UNIQUE KEY `index1` (`Identifier`),
|
UNIQUE KEY `index1` (`Identifier`),
|
||||||
KEY `index2` (`EbookCreated`),
|
KEY `index2` (`EbookCreated`),
|
||||||
|
|
|
@ -83,6 +83,8 @@ final class Ebook{
|
||||||
/** When the database row was updated. */
|
/** When the database row was updated. */
|
||||||
public DateTimeImmutable $Updated;
|
public DateTimeImmutable $Updated;
|
||||||
public ?int $TextSinglePageByteCount = null;
|
public ?int $TextSinglePageByteCount = null;
|
||||||
|
public int $DownloadsPast30Days = 0;
|
||||||
|
public int $DownloadsTotal = 0;
|
||||||
|
|
||||||
/** @var array<GitCommit> $_GitCommits */
|
/** @var array<GitCommit> $_GitCommits */
|
||||||
protected array $_GitCommits;
|
protected array $_GitCommits;
|
||||||
|
@ -1421,6 +1423,14 @@ final class Ebook{
|
||||||
$error->Add(new Exceptions\InvalidEbookTextSinglePageByteCountException('Invalid Ebook TextSinglePageByteCount: ' . $this->TextSinglePageByteCount));
|
$error->Add(new Exceptions\InvalidEbookTextSinglePageByteCountException('Invalid Ebook TextSinglePageByteCount: ' . $this->TextSinglePageByteCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(isset($this->DownloadsPast30Days) && $this->DownloadsPast30Days < 0){
|
||||||
|
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsPast30Days: ' . $this->DownloadsPast30Days));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($this->DownloadsTotal) && $this->DownloadsTotal < 0){
|
||||||
|
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid Ebook DownloadsTotal: ' . $this->DownloadsTotal));
|
||||||
|
}
|
||||||
|
|
||||||
if(sizeof($this->Authors) == 0){
|
if(sizeof($this->Authors) == 0){
|
||||||
$error->Add(new Exceptions\EbookAuthorRequiredException());
|
$error->Add(new Exceptions\EbookAuthorRequiredException());
|
||||||
}
|
}
|
||||||
|
@ -1819,7 +1829,7 @@ final class Ebook{
|
||||||
AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle,
|
AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle,
|
||||||
Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl,
|
Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl,
|
||||||
EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors,
|
EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors,
|
||||||
IndexableCollections)
|
IndexableCollections, DownloadsPast30Days, DownloadsTotal)
|
||||||
values (?,
|
values (?,
|
||||||
?,
|
?,
|
||||||
?,
|
?,
|
||||||
|
@ -1844,6 +1854,8 @@ final class Ebook{
|
||||||
?,
|
?,
|
||||||
?,
|
?,
|
||||||
?,
|
?,
|
||||||
|
?,
|
||||||
|
?,
|
||||||
?)
|
?)
|
||||||
returning EbookId
|
returning EbookId
|
||||||
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
||||||
|
@ -1851,7 +1863,8 @@ final class Ebook{
|
||||||
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
||||||
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
||||||
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
||||||
$this->IndexableAuthors, $this->IndexableCollections]);
|
$this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
|
||||||
|
$this->DownloadsTotal]);
|
||||||
|
|
||||||
try{
|
try{
|
||||||
$this->AddTags();
|
$this->AddTags();
|
||||||
|
@ -1918,7 +1931,9 @@ final class Ebook{
|
||||||
TextSinglePageByteCount = ?,
|
TextSinglePageByteCount = ?,
|
||||||
IndexableText = ?,
|
IndexableText = ?,
|
||||||
IndexableAuthors = ?,
|
IndexableAuthors = ?,
|
||||||
IndexableCollections = ?
|
IndexableCollections = ?,
|
||||||
|
DownloadsPast30Days = ?,
|
||||||
|
DownloadsTotal = ?
|
||||||
where
|
where
|
||||||
EbookId = ?
|
EbookId = ?
|
||||||
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl,
|
||||||
|
@ -1926,7 +1941,8 @@ final class Ebook{
|
||||||
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
$this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription,
|
||||||
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
$this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl,
|
||||||
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
$this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText,
|
||||||
$this->IndexableAuthors, $this->IndexableCollections,
|
$this->IndexableAuthors, $this->IndexableCollections, $this->DownloadsPast30Days,
|
||||||
|
$this->DownloadsTotal,
|
||||||
$this->EbookId]);
|
$this->EbookId]);
|
||||||
}
|
}
|
||||||
catch(Exceptions\DuplicateDatabaseKeyException){
|
catch(Exceptions\DuplicateDatabaseKeyException){
|
||||||
|
@ -2144,6 +2160,18 @@ final class Ebook{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws Exceptions\InvalidEbookDownloadException
|
||||||
|
*/
|
||||||
|
public function AddDownload(?string $ipAddress, ?string $userAgent): void{
|
||||||
|
$ebookDownload = new EbookDownload();
|
||||||
|
$ebookDownload->EbookId = $this->EbookId;
|
||||||
|
$ebookDownload->IpAddress = $ipAddress;
|
||||||
|
$ebookDownload->UserAgent = $userAgent;
|
||||||
|
|
||||||
|
$ebookDownload->Create();
|
||||||
|
}
|
||||||
|
|
||||||
public function Delete(): void{
|
public function Delete(): void{
|
||||||
$this->RemoveTags();
|
$this->RemoveTags();
|
||||||
$this->RemoveLocSubjects();
|
$this->RemoveLocSubjects();
|
||||||
|
|
89
lib/EbookDownload.php
Normal file
89
lib/EbookDownload.php
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
<?
|
||||||
|
|
||||||
|
use Safe\DateTimeImmutable;
|
||||||
|
|
||||||
|
class EbookDownload{
|
||||||
|
public int $EbookId;
|
||||||
|
public DateTimeImmutable $Created;
|
||||||
|
public ?string $IpAddress;
|
||||||
|
public ?string $UserAgent;
|
||||||
|
|
||||||
|
public function IsBot(): bool{
|
||||||
|
if(empty($this->UserAgent) || strlen($this->UserAgent) < 20){
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$botKeywords = [
|
||||||
|
'bot', 'crawl', 'spider', 'slurp', 'chatgpt', 'search',
|
||||||
|
'python', 'java', 'curl', 'wget', 'scrape'
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach($botKeywords as $keyword){
|
||||||
|
if(strpos($this->UserAgent, $keyword) !== false){
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws Exceptions\InvalidEbookDownloadException
|
||||||
|
*/
|
||||||
|
public function Validate(): void{
|
||||||
|
$error = new Exceptions\InvalidEbookDownloadException();
|
||||||
|
|
||||||
|
if(!isset($this->EbookId)){
|
||||||
|
$error->Add(new Exceptions\EbookDownloadEbookIdRequiredException());
|
||||||
|
}
|
||||||
|
|
||||||
|
if($this->IpAddress == ''){
|
||||||
|
$this->IpAddress = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($this->UserAgent == ''){
|
||||||
|
$this->UserAgent = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The `IpAddress` column expects IPv6 address strings.
|
||||||
|
if(is_string($this->IpAddress) && filter_var($this->IpAddress, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)){
|
||||||
|
$this->IpAddress = '::ffff:' . $this->IpAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($error->HasExceptions){
|
||||||
|
throw $error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws Exceptions\InvalidEbookDownloadException
|
||||||
|
*/
|
||||||
|
public function Create(): void{
|
||||||
|
$this->Validate();
|
||||||
|
|
||||||
|
$this->Created = NOW;
|
||||||
|
|
||||||
|
Db::Query('
|
||||||
|
INSERT into EbookDownloads (EbookId, Created, IpAddress, UserAgent)
|
||||||
|
values (?,
|
||||||
|
?,
|
||||||
|
?,
|
||||||
|
?)
|
||||||
|
', [$this->EbookId, $this->Created, $this->IpAddress, $this->UserAgent]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array<EbookDownload>
|
||||||
|
*/
|
||||||
|
public static function GetAllByDate(DateTimeImmutable $date): array{
|
||||||
|
$startDate = $date->setTime(0, 0, 0);
|
||||||
|
$endDate = $date->setTime(0, 0, 0)->modify('+1 day');
|
||||||
|
|
||||||
|
return Db::Query('
|
||||||
|
SELECT *
|
||||||
|
from EbookDownloads
|
||||||
|
where Created >= ?
|
||||||
|
and Created < ?
|
||||||
|
', [$startDate, $endDate], EbookDownload::class);
|
||||||
|
}
|
||||||
|
}
|
52
lib/EbookDownloadSummary.php
Normal file
52
lib/EbookDownloadSummary.php
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
<?
|
||||||
|
|
||||||
|
use Safe\DateTimeImmutable;
|
||||||
|
|
||||||
|
class EbookDownloadSummary{
|
||||||
|
public int $EbookId;
|
||||||
|
public DateTimeImmutable $Date;
|
||||||
|
public int $DownloadCount = 0;
|
||||||
|
public int $BotDownloadCount = 0;
|
||||||
|
|
||||||
|
public function __construct(int $ebookId, DateTimeImmutable $date){
|
||||||
|
$this->EbookId = $ebookId;
|
||||||
|
$this->Date = $date;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws Exceptions\InvalidEbookDownloadSummaryException
|
||||||
|
*/
|
||||||
|
public function Validate(): void{
|
||||||
|
$error = new Exceptions\InvalidEbookDownloadSummaryException();
|
||||||
|
|
||||||
|
if($this->DownloadCount < 0){
|
||||||
|
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary DownloadCount: ' . $this->DownloadCount));
|
||||||
|
}
|
||||||
|
|
||||||
|
if($this->BotDownloadCount < 0){
|
||||||
|
$error->Add(new Exceptions\InvalidEbookDownloadCountException('Invalid EbookDownloadSummary BotDownloadCount: ' . $this->BotDownloadCount));
|
||||||
|
}
|
||||||
|
|
||||||
|
if($error->HasExceptions){
|
||||||
|
throw $error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws Exceptions\InvalidEbookDownloadSummaryException
|
||||||
|
*/
|
||||||
|
public function Create(): void{
|
||||||
|
$this->Validate();
|
||||||
|
|
||||||
|
Db::Query('
|
||||||
|
INSERT into EbookDownloadSummaries (EbookId, Date, DownloadCount, BotDownloadCount)
|
||||||
|
values (?,
|
||||||
|
?,
|
||||||
|
?,
|
||||||
|
?)
|
||||||
|
on duplicate key update
|
||||||
|
DownloadCount = value(DownloadCount),
|
||||||
|
BotDownloadCount = value(BotDownloadCount)
|
||||||
|
', [$this->EbookId, $this->Date, $this->DownloadCount, $this->BotDownloadCount]);
|
||||||
|
}
|
||||||
|
}
|
7
lib/Exceptions/EbookDownloadEbookIdRequiredException.php
Normal file
7
lib/Exceptions/EbookDownloadEbookIdRequiredException.php
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<?
|
||||||
|
namespace Exceptions;
|
||||||
|
|
||||||
|
class EbookDownloadEbookIdRequiredException extends AppException{
|
||||||
|
/** @var string $message */
|
||||||
|
protected $message = 'EbookDownload EbookId required.';
|
||||||
|
}
|
5
lib/Exceptions/InvalidEbookDownloadCountException.php
Normal file
5
lib/Exceptions/InvalidEbookDownloadCountException.php
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
<?
|
||||||
|
namespace Exceptions;
|
||||||
|
|
||||||
|
class InvalidEbookDownloadCountException extends AppException{
|
||||||
|
}
|
7
lib/Exceptions/InvalidEbookDownloadException.php
Normal file
7
lib/Exceptions/InvalidEbookDownloadException.php
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<?
|
||||||
|
namespace Exceptions;
|
||||||
|
|
||||||
|
class InvalidEbookDownloadException extends ValidationException{
|
||||||
|
/** @var string $message */
|
||||||
|
protected $message = 'EbookDownload is invalid.';
|
||||||
|
}
|
7
lib/Exceptions/InvalidEbookDownloadSummaryException.php
Normal file
7
lib/Exceptions/InvalidEbookDownloadSummaryException.php
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<?
|
||||||
|
namespace Exceptions;
|
||||||
|
|
||||||
|
class InvalidEbookDownloadSummaryException extends ValidationException{
|
||||||
|
/** @var string $message */
|
||||||
|
protected $message = 'EbookDownloadSummary is invalid.';
|
||||||
|
}
|
59
scripts/recompute-ebook-downloads
Executable file
59
scripts/recompute-ebook-downloads
Executable file
|
@ -0,0 +1,59 @@
|
||||||
|
#!/usr/bin/php
|
||||||
|
<?
|
||||||
|
require_once('/standardebooks.org/web/lib/Core.php');
|
||||||
|
|
||||||
|
$downloadDates = Db::Query('
|
||||||
|
SELECT distinct date(Created) as DownloadDate
|
||||||
|
from EbookDownloads
|
||||||
|
');
|
||||||
|
|
||||||
|
foreach($downloadDates as $date){
|
||||||
|
$downloadDate = $date->DownloadDate;
|
||||||
|
$summaries = [];
|
||||||
|
|
||||||
|
$ebookDownloads = EbookDownload::GetAllByDate($downloadDate);
|
||||||
|
foreach($ebookDownloads as $ebookDownload){
|
||||||
|
$ebookId = $ebookDownload->EbookId;
|
||||||
|
|
||||||
|
if(!isset($summaries[$ebookId])){
|
||||||
|
$summaries[$ebookId] = new EbookDownloadSummary($ebookId, $downloadDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
if($ebookDownload->IsBot()){
|
||||||
|
$summaries[$ebookId]->BotDownloadCount++;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
$summaries[$ebookId]->DownloadCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($summaries as $summary){
|
||||||
|
$summary->Create();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Db::Query('
|
||||||
|
UPDATE Ebooks e
|
||||||
|
left join (
|
||||||
|
select EbookId, sum(DownloadCount) AS DownloadsPast30Days
|
||||||
|
from EbookDownloadSummaries
|
||||||
|
where Date >= curdate() - interval 29 day
|
||||||
|
group by EbookId
|
||||||
|
) s on e.EbookId = s.EbookId
|
||||||
|
set e.DownloadsPast30Days = coalesce(s.DownloadsPast30Days, 0)
|
||||||
|
');
|
||||||
|
|
||||||
|
Db::Query('
|
||||||
|
UPDATE Ebooks e
|
||||||
|
left join (
|
||||||
|
select EbookId, sum(DownloadCount) AS DownloadsTotal
|
||||||
|
from EbookDownloadSummaries
|
||||||
|
group by EbookId
|
||||||
|
) s on e.EbookId = s.EbookId
|
||||||
|
set e.DownloadsTotal = coalesce(s.DownloadsTotal, 0)
|
||||||
|
');
|
||||||
|
|
||||||
|
Db::Query('
|
||||||
|
DELETE from EbookDownloads
|
||||||
|
where Created < utc_timestamp() - interval 60 day
|
||||||
|
');
|
|
@ -34,6 +34,19 @@ try{
|
||||||
throw new Exceptions\InvalidFileException();
|
throw new Exceptions\InvalidFileException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @var string|null $ipAddress */
|
||||||
|
$ipAddress = $_SERVER['REMOTE_ADDR'] ?? null;
|
||||||
|
|
||||||
|
/** @var string|null $userAgent */
|
||||||
|
$userAgent = $_SERVER['HTTP_USER_AGENT'] ?? null;
|
||||||
|
|
||||||
|
try{
|
||||||
|
$ebook->AddDownload($ipAddress, $userAgent);
|
||||||
|
}
|
||||||
|
catch(Exceptions\InvalidEbookDownloadException){
|
||||||
|
// Pass. Allow the download to continue even if it isn't recorded.
|
||||||
|
}
|
||||||
|
|
||||||
if($skipThankYouPage){
|
if($skipThankYouPage){
|
||||||
// Download the file directly, without showing the thank you page.
|
// Download the file directly, without showing the thank you page.
|
||||||
$downloadUrl = $ebook->GetDownloadUrl($format);
|
$downloadUrl = $ebook->GetDownloadUrl($format);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue