Include ToC entries in search results for short and poetry compilations

This commit is contained in:
Alex Cabal 2020-12-08 14:27:01 -06:00
parent 95ee5265bc
commit 658db66c2b
3 changed files with 25 additions and 2 deletions

View file

@ -57,6 +57,7 @@ class Ebook{
public $ModifiedTimestamp; public $ModifiedTimestamp;
public $TextUrl; public $TextUrl;
public $TextSinglePageUrl; public $TextSinglePageUrl;
public $TocEntries = null; // A list of non-Roman ToC entries ONLY IF the work has the 'shorts' or 'poetry' tag, null otherwise
public function __construct(string $wwwFilesystemPath){ public function __construct(string $wwwFilesystemPath){
// First, construct a source repo path from our WWW filesystem path. // First, construct a source repo path from our WWW filesystem path.
@ -194,8 +195,23 @@ class Ebook{
} }
// Get SE tags // Get SE tags
$includeToc = false;
foreach($xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [] as $tag){ foreach($xml->xpath('/package/metadata/meta[@property="se:subject"]') ?: [] as $tag){
$this->Tags[] = new Tag($tag); $this->Tags[] = new Tag($tag);
if($tag == 'Shorts' || $tag == 'Poetry'){
$includeToc = true;
}
}
// Fill the ToC if necessary
if($includeToc){
$this->TocEntries = [];
$tocDom = new SimpleXMLElement(str_replace('xmlns=', 'ns=', file_get_contents($wwwFilesystemPath . '/toc.xhtml') ?: ''));
$tocDom->registerXPathNamespace('epub', 'http://www.idpf.org/2007/ops');
foreach($tocDom->xpath('/html/body//nav[@epub:type="toc"]//a[not(contains(@epub:type, "z3998:roman")) and not(text() = "Titlepage" or text() = "Imprint" or text() = "Colophon" or text() = "Endnotes" or text() = "Uncopyright") and not(contains(@href, "halftitle"))]') as $item){
$this->TocEntries[] = (string)$item;
}
} }
// Get SE collections // Get SE collections
@ -423,6 +439,7 @@ class Ebook{
public function Contains(string $query): bool{ public function Contains(string $query): bool{
// When searching an ebook, we search the title, alternate title, author(s), SE tags, series data, and LoC tags. // When searching an ebook, we search the title, alternate title, author(s), SE tags, series data, and LoC tags.
// Also, if the ebook is shorts or poetry, search the ToC as well.
$searchString = $this->FullTitle ?? $this->Title; $searchString = $this->FullTitle ?? $this->Title;
@ -444,6 +461,12 @@ class Ebook{
$searchString .= ' ' . $tag; $searchString .= ' ' . $tag;
} }
if($this->TocEntries !== null){
foreach($this->TocEntries as $item){
$searchString .= ' ' . $item;
}
}
// Remove diacritics and non-alphanumeric characters // Remove diacritics and non-alphanumeric characters
$searchString = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($searchString)) ?? ''); $searchString = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($searchString)) ?? '');
$query = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($query)) ?? ''); $query = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', Formatter::RemoveDiacritics($query)) ?? '');

View file

@ -7,7 +7,7 @@ $allSelected = sizeof($tags) == 0 || in_array('all', $tags);
<option value="all">All</option> <option value="all">All</option>
<? foreach(Library::GetTags() as $tag){ <? foreach(Library::GetTags() as $tag){
$lcTag = mb_strtolower($tag); ?> $lcTag = mb_strtolower($tag); ?>
<option value="<?= $lcTag ?>"<? if(!$allSelected && in_array($lcTag, $tags)){ ?> selected="selected"<? } ?>><?= $tag ?></option> <option value="<?= Formatter::ToPlainText($lcTag) ?>"<? if(!$allSelected && in_array($lcTag, $tags)){ ?> selected="selected"<? } ?>><?= Formatter::ToPlainText($tag) ?></option>
<? } ?> <? } ?>
</select> </select>
</label> </label>

View file

@ -121,7 +121,7 @@ try{
} }
} }
$queryString = preg_replace('/^&amp;/ius', '', $queryString); $queryString = Formatter::ToPlainText(preg_replace('/^&amp;/ius', '', $queryString));
} }
catch(\Exception $ex){ catch(\Exception $ex){
http_response_code(404); http_response_code(404);