From 658db66c2bb18c83d78873f82470adee6575f3b2 Mon Sep 17 00:00:00 2001 From: Alex Cabal Date: Tue, 8 Dec 2020 14:27:01 -0600 Subject: [PATCH] Include ToC entries in search results for short and poetry compilations --- lib/Ebook.php | 23 +++++++++++++++++++++++ templates/SearchForm.php | 2 +- www/ebooks/index.php | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/Ebook.php b/lib/Ebook.php index 2bd683af..a4137232 100644 --- a/lib/Ebook.php +++ b/lib/Ebook.php @@ -57,6 +57,7 @@ class Ebook{ public $ModifiedTimestamp; public $TextUrl; public $TextSinglePageUrl; + public $TocEntries = null; // A list of non-Roman ToC entries ONLY IF the work has the 'shorts' or 'poetry' tag, null otherwise public function __construct(string $wwwFilesystemPath){ // First, construct a source repo path from our WWW filesystem path. @@ -194,8 +195,23 @@ class Ebook{ } // Get SE tags + $includeToc = false; foreach($xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [] as $tag){ $this->Tags[] = new Tag($tag); + + if($tag == 'Shorts' || $tag == 'Poetry'){ + $includeToc = true; + } + } + + // Fill the ToC if necessary + if($includeToc){ + $this->TocEntries = []; + $tocDom = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($wwwFilesystemPath . '/toc.xhtml') ?: '')); + $tocDom->registerXPathNamespace('epub', 'http://www.idpf.org/2007/ops'); + foreach($tocDom->xpath('/html/body//nav[@epub:type="toc"]//a[not(contains(@epub:type, "z3998:roman")) and not(text() = "Titlepage" or text() = "Imprint" or text() = "Colophon" or text() = "Endnotes" or text() = "Uncopyright") and not(contains(@href, "halftitle"))]') as $item){ + $this->TocEntries[] = (string)$item; + } } // Get SE collections @@ -423,6 +439,7 @@ class Ebook{ public function Contains(string $query): bool{ // When searching an ebook, we search the title, alternate title, author(s), SE tags, series data, and LoC tags. + // Also, if the ebook is shorts or poetry, search the ToC as well. $searchString = $this->FullTitle ?? $this->Title; @@ -444,6 +461,12 @@ class Ebook{ $searchString .= ' ' . $tag; } + if($this->TocEntries !== null){ + foreach($this->TocEntries as $item){ + $searchString .= ' ' . $item; + } + } + // Remove diacritics and non-alphanumeric characters $searchString = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($searchString)) ?? ''); $query = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($query)) ?? ''); diff --git a/templates/SearchForm.php b/templates/SearchForm.php index c1f11324..35ab3bc0 100644 --- a/templates/SearchForm.php +++ b/templates/SearchForm.php @@ -7,7 +7,7 @@ $allSelected = sizeof($tags) == 0 || in_array('all', $tags); - + diff --git a/www/ebooks/index.php b/www/ebooks/index.php index 43e62bd8..dc08de10 100644 --- a/www/ebooks/index.php +++ b/www/ebooks/index.php @@ -121,7 +121,7 @@ try{ } } - $queryString = preg_replace('/^&/ius', '', $queryString); + $queryString = Formatter::ToPlainText(preg_replace('/^&/ius', '', $queryString)); } catch(\Exception $ex){ http_response_code(404);