diff --git a/config/sql/se/Ebooks.sql b/config/sql/se/Ebooks.sql index 9eda89e9..795abf7d 100644 --- a/config/sql/se/Ebooks.sql +++ b/config/sql/se/Ebooks.sql @@ -25,8 +25,13 @@ CREATE TABLE IF NOT EXISTS `Ebooks` ( `EbookUpdated` datetime NULL, `TextSinglePageByteCount` bigint unsigned NULL, `IndexableText` text NOT NULL, + `IndexableAuthors` text NOT NULL, + `IndexableCollections` text NULL, PRIMARY KEY (`EbookId`), UNIQUE KEY `index1` (`Identifier`), KEY `index2` (`EbookCreated`), - FULLTEXT `idxSearch` (`IndexableText`) + FULLTEXT `idxSearch` (`IndexableText`), + FULLTEXT `idxSearchTitle` (`Title`), + FULLTEXT `idxSearchAuthors` (`IndexableAuthors`), + FULLTEXT `idxSearchCollections` (`IndexableCollections`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; diff --git a/lib/Constants.php b/lib/Constants.php index bb9ff970..3352f5ea 100644 --- a/lib/Constants.php +++ b/lib/Constants.php @@ -44,6 +44,10 @@ const EBOOKS_MAX_STRING_LENGTH = 250; const EBOOKS_MAX_LONG_STRING_LENGTH = 500; const EBOOK_SINGLE_PAGE_SIZE_WARNING = 3 * 1024 * 1024; // 3145728 bytes. +const EBOOK_SEARCH_WEIGHT_TITLE = 10; +const EBOOK_SEARCH_WEIGHT_AUTHORS = 8; +const EBOOK_SEARCH_WEIGHT_COLLECTIONS = 3; + const ARTWORK_THUMBNAIL_HEIGHT = 350; const ARTWORK_THUMBNAIL_WIDTH = 350; const ARTWORK_PER_PAGE = 20; diff --git a/lib/Ebook.php b/lib/Ebook.php index 57a5d78b..700e91b8 100644 --- a/lib/Ebook.php +++ b/lib/Ebook.php @@ -44,6 +44,8 @@ use function Safe\shell_exec; * @property string $TextSinglePageUrl * @property string $TextSinglePageSizeFormatted * @property string $IndexableText + * @property string $IndexableAuthors + * @property ?string $IndexableCollections * @property ?EbookPlaceholder $EbookPlaceholder * @property array $Projects * @property array $PastProjects @@ -128,6 +130,8 @@ final class Ebook{ protected string $_TextSinglePageUrl; protected string $_TextSinglePageSizeFormatted; protected string $_IndexableText; + protected string $_IndexableAuthors; + protected ?string $_IndexableCollections = null; protected ?EbookPlaceholder $_EbookPlaceholder = null; /** @var array $_Projects */ protected array $_Projects; @@ -732,13 +736,40 @@ final class Ebook{ } } - // Remove diacritics and non-alphanumeric characters. - $this->_IndexableText = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($this->_IndexableText))); + $this->_IndexableText = Formatter::RemoveDiacriticsAndNonalphanumerics($this->_IndexableText); } return $this->_IndexableText; } + protected function GetIndexableAuthors(): string{ + if(!isset($this->_IndexableAuthors)){ + $this->_IndexableAuthors = ''; + + foreach($this->Authors as $author){ + $this->_IndexableAuthors .= ' ' . $author->Name; + } + + $this->_IndexableAuthors = Formatter::RemoveDiacriticsAndNonalphanumerics($this->_IndexableAuthors); + } + + return $this->_IndexableAuthors; + } + + protected function GetIndexableCollections(): ?string{ + if(!isset($this->_IndexableCollections)){ + foreach($this->CollectionMemberships as $collectionMembership){ + $this->_IndexableCollections .= ' ' . $collectionMembership->Collection->Name; + } + + if(isset($this->_IndexableCollections)){ + $this->_IndexableCollections = Formatter::RemoveDiacriticsAndNonalphanumerics($this->_IndexableCollections); + } + } + + return $this->_IndexableCollections; + } + protected function GetEbookPlaceholder(): ?EbookPlaceholder{ if(!isset($this->_EbookPlaceholder)){ if(!isset($this->EbookId)){ @@ -1561,6 +1592,22 @@ final class Ebook{ $error->Add(new Exceptions\EbookIndexableTextRequiredException()); } + if(isset($this->IndexableAuthors)){ + $this->IndexableAuthors = trim($this->IndexableAuthors ?? ''); + + if($this->IndexableAuthors == ''){ + $error->Add(new Exceptions\EbookIndexableAuthorsRequiredException()); + } + } + else{ + $error->Add(new Exceptions\EbookIndexableAuthorsRequiredException()); + } + + $this->IndexableCollections = trim($this->IndexableCollections ?? ''); + if($this->IndexableCollections == ''){ + $this->IndexableCollections = null; + } + if(isset($this->EbookPlaceholder)){ try{ $this->EbookPlaceholder->Validate(); @@ -1897,7 +1944,8 @@ final class Ebook{ INSERT into Ebooks (Identifier, WwwFilesystemPath, RepoFilesystemPath, KindleCoverUrl, EpubUrl, AdvancedEpubUrl, KepubUrl, Azw3Url, DistCoverUrl, Title, FullTitle, AlternateTitle, Description, LongDescription, Language, WordCount, ReadingEase, GitHubUrl, WikipediaUrl, - EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText) + EbookCreated, EbookUpdated, TextSinglePageByteCount, IndexableText, IndexableAuthors, + IndexableCollections) values (?, ?, ?, @@ -1920,12 +1968,15 @@ final class Ebook{ ?, ?, ?, + ?, + ?, ?) ', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl, $this->AdvancedEpubUrl, $this->KepubUrl, $this->Azw3Url, $this->DistCoverUrl, $this->Title, $this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription, $this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl, - $this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText]); + $this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText, + $this->IndexableAuthors, $this->IndexableCollections]); $this->EbookId = Db::GetLastInsertedId(); @@ -1990,7 +2041,9 @@ final class Ebook{ EbookCreated = ?, EbookUpdated = ?, TextSinglePageByteCount = ?, - IndexableText = ? + IndexableText = ?, + IndexableAuthors = ?, + IndexableCollections = ? where EbookId = ? ', [$this->Identifier, $this->WwwFilesystemPath, $this->RepoFilesystemPath, $this->KindleCoverUrl, $this->EpubUrl, @@ -1998,6 +2051,7 @@ final class Ebook{ $this->FullTitle, $this->AlternateTitle, $this->Description, $this->LongDescription, $this->Language, $this->WordCount, $this->ReadingEase, $this->GitHubUrl, $this->WikipediaUrl, $this->EbookCreated, $this->EbookUpdated, $this->TextSinglePageByteCount, $this->IndexableText, + $this->IndexableAuthors, $this->IndexableCollections, $this->EbookId]); } catch(Exceptions\DuplicateDatabaseKeyException){ @@ -2380,6 +2434,7 @@ final class Ebook{ public static function GetAllByFilter(string $query = null, array $tags = [], Enums\EbookSortType $sort = null, int $page = 1, int $perPage = EBOOKS_PER_PAGE, Enums\EbookReleaseStatusFilter $releaseStatusFilter = Enums\EbookReleaseStatusFilter::All): array{ $limit = $perPage; $offset = (($page - 1) * $perPage); + $relevanceScoreField = ''; $joinContributors = ''; $joinTags = ''; $params = []; @@ -2424,10 +2479,22 @@ final class Ebook{ } if($query !== null && $query != ''){ - $query = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($query))); - $query = sprintf('"%s"', $query); // Require an exact match via double quotes. - $whereCondition .= ' and match(e.IndexableText) against(? in boolean mode) '; + // Preserve quotes in the query so the user can enter, e.g., "war and peace" for an exact match. + $query = trim(preg_replace('|[^a-zA-Z0-9" ]|ius', ' ', Formatter::RemoveDiacritics($query))); + + $relevanceScoreField = ', ( + match(e.Title) against (?) * ' . EBOOK_SEARCH_WEIGHT_TITLE . ' + + match(e.IndexableAuthors) against (?) * ' . EBOOK_SEARCH_WEIGHT_AUTHORS . ' + + match(e.IndexableCollections) against (?) * ' . EBOOK_SEARCH_WEIGHT_COLLECTIONS . ' + + match(e.IndexableText) against (?) + ) as relevance_score '; + + $whereCondition .= ' and match(e.IndexableText) against(?) '; $params[] = $query; + + if($sort == null || $sort == Enums\EbookSortType::Relevance || $sort == Enums\EbookSortType::Newest){ + $orderBy = 'relevance_score desc, e.EbookCreated desc'; + } } try{ @@ -2439,11 +2506,17 @@ final class Ebook{ ' . $whereCondition . ' ', $params); + if($relevanceScoreField != ''){ + // `relevance_score` is at the beginning of the query, so these params must go at the start of the array. + array_unshift($params, $query, $query, $query, $query); + } + $params[] = $limit; $params[] = $offset; $ebooks = Db::Query(' SELECT distinct e.* + ' . $relevanceScoreField . ' from Ebooks e ' . $joinContributors . ' ' . $joinTags . ' diff --git a/lib/Enums/EbookSortType.php b/lib/Enums/EbookSortType.php index c247951e..324e744e 100644 --- a/lib/Enums/EbookSortType.php +++ b/lib/Enums/EbookSortType.php @@ -6,4 +6,5 @@ enum EbookSortType: string{ case AuthorAlpha = 'author-alpha'; case ReadingEase = 'reading-ease'; case Length = 'length'; + case Relevance = 'relevance'; } diff --git a/lib/Exceptions/EbookIndexableAuthorsRequiredException.php b/lib/Exceptions/EbookIndexableAuthorsRequiredException.php new file mode 100644 index 00000000..2f383085 --- /dev/null +++ b/lib/Exceptions/EbookIndexableAuthorsRequiredException.php @@ -0,0 +1,7 @@ +