mirror of
https://github.com/standardebooks/web.git
synced 2025-07-15 19:06:49 -04:00
Initial commit
This commit is contained in:
commit
28c8a3f0ba
136 changed files with 13350 additions and 0 deletions
482
lib/Ebook.php
Normal file
482
lib/Ebook.php
Normal file
|
@ -0,0 +1,482 @@
|
|||
<?
|
||||
class Ebook{
|
||||
public $WwwFilesystemPath;
|
||||
public $RepoFilesystemPath;
|
||||
public $Url;
|
||||
public $KindleCoverUrl;
|
||||
public $EpubUrl;
|
||||
public $Epub3Url;
|
||||
public $KepubUrl;
|
||||
public $Azw3Url;
|
||||
public $HasDownloads;
|
||||
public $GitCommits = [];
|
||||
public $Tags = [];
|
||||
public $LocTags = [];
|
||||
public $Identifier;
|
||||
public $UrlSafeIdentifier;
|
||||
public $HeroImageUrl;
|
||||
public $HeroImage2xUrl;
|
||||
public $CoverImageUrl;
|
||||
public $CoverImage2xUrl;
|
||||
public $Title;
|
||||
public $FullTitle;
|
||||
public $Description;
|
||||
public $LongDescription;
|
||||
public $Language;
|
||||
public $WordCount;
|
||||
public $ReadingEase;
|
||||
public $ReadingEaseDescription;
|
||||
public $ReadingTime;
|
||||
public $GitHubUrl;
|
||||
public $WikipediaUrl;
|
||||
public $SourceUrls = [];
|
||||
public $Authors = []; // Array of Contributors
|
||||
public $AuthorsHtml;
|
||||
public $AuthorsUrl; // This is a single URL even if there are multiple authors; for example, /ebooks/karl-marx_friedrich-engels/
|
||||
public $Illustrators = []; // Array of Contributors
|
||||
public $Translators = []; // Array of Contributors
|
||||
public $Contributors = []; // Array of Contributors
|
||||
public $ContributorsHtml;
|
||||
public $TitleWithCreditsHtml = '';
|
||||
public $Timestamp;
|
||||
|
||||
public function __construct($wwwFilesystemPath){
|
||||
// First, construct a source repo path from our WWW filesystem path.
|
||||
$this->RepoFilesystemPath = str_replace(SITE_ROOT . '/www/ebooks/', '', $wwwFilesystemPath);
|
||||
$this->RepoFilesystemPath = SITE_ROOT . '/ebooks/' . str_replace('/', '_', $this->RepoFilesystemPath) . '.git';
|
||||
|
||||
if(!is_dir($this->RepoFilesystemPath)){ // On dev systems we might not have the bare repos, so make an adjustment
|
||||
$this->RepoFilesystemPath = preg_replace('/\.git$/ius', '', $this->RepoFilesystemPath);
|
||||
}
|
||||
|
||||
if(!is_dir($wwwFilesystemPath)){
|
||||
throw new InvalidEbookException('Invalid www filesystem path: ' . $wwwFilesystemPath);
|
||||
}
|
||||
|
||||
if(!is_dir($this->RepoFilesystemPath)){
|
||||
throw new InvalidEbookException('Invalid repo filesystem path: ' . $this->RepoFilesystemPath);
|
||||
}
|
||||
|
||||
if(!is_file($wwwFilesystemPath . '/src/epub/content.opf')){
|
||||
throw new InvalidEbookException('Invalid content.opf file: ' . $wwwFilesystemPath . '/src/epub/content.opf');
|
||||
}
|
||||
|
||||
$this->WwwFilesystemPath = $wwwFilesystemPath;
|
||||
$this->Url = str_replace(SITE_ROOT . '/www', '', $this->WwwFilesystemPath);
|
||||
|
||||
$rawMetadata = file_get_contents($wwwFilesystemPath . '/src/epub/content.opf');
|
||||
|
||||
// Get the SE identifier.
|
||||
preg_match('|<dc:identifier[^>]*?>(.+?)</dc:identifier>|ius', $rawMetadata, $matches);
|
||||
if(sizeof($matches) != 2){
|
||||
throw new EbookParsingException('Invalid <dc:identifier> element.');
|
||||
}
|
||||
$this->Identifier = $matches[1];
|
||||
|
||||
$this->UrlSafeIdentifier = str_replace(['url:https://standardebooks.org/ebooks/', '/'], ['', '_'], $this->Identifier);
|
||||
|
||||
// Generate the Kindle cover URL.
|
||||
$tempPath = glob($this->WwwFilesystemPath . '/dist/*_EBOK_portrait.jpg');
|
||||
if(sizeof($tempPath) > 0){
|
||||
$this->KindleCoverUrl = $this->Url . '/dist/' . basename($tempPath[0]);
|
||||
}
|
||||
|
||||
// Generate the epub URL.
|
||||
$tempPath = glob($this->WwwFilesystemPath . '/dist/*.epub');
|
||||
if(sizeof($tempPath) > 0){
|
||||
$this->EpubUrl = $this->Url . '/dist/' . basename($tempPath[0]);
|
||||
}
|
||||
|
||||
// Generate the epub3 URL
|
||||
$tempPath = glob($this->WwwFilesystemPath . '/dist/*.epub3');
|
||||
if(sizeof($tempPath) > 0){
|
||||
$this->Epub3Url = $this->Url . '/dist/' . basename($tempPath[0]);
|
||||
}
|
||||
|
||||
// Generate the Kepub URL
|
||||
$tempPath = glob($this->WwwFilesystemPath . '/dist/*.kepub.epub');
|
||||
if(sizeof($tempPath) > 0){
|
||||
$this->KepubUrl = $this->Url . '/dist/' . basename($tempPath[0]);
|
||||
}
|
||||
|
||||
// Generate the azw3 URL.
|
||||
$tempPath = glob($this->WwwFilesystemPath . '/dist/*.azw3');
|
||||
if(sizeof($tempPath) > 0){
|
||||
$this->Azw3Url = $this->Url . '/dist/' . basename($tempPath[0]);
|
||||
}
|
||||
|
||||
$this->HasDownloads = $this->EpubUrl || $this->Epub3Url || $this->KepubUrl || $this->Azw3Url;
|
||||
|
||||
// Fill in the short history of this repo.
|
||||
$historyEntries = explode("\n", shell_exec('cd ' . escapeshellarg($this->RepoFilesystemPath) . ' && git log -n5 --pretty=format:"%ct %s"'));
|
||||
|
||||
foreach($historyEntries as $entry){
|
||||
$array = explode(' ', $entry, 2);
|
||||
$this->GitCommits[] = new GitCommit($array[0], $array[1]);
|
||||
}
|
||||
|
||||
// Get cover image URLs.
|
||||
$gitFolderPath = $this->RepoFilesystemPath;
|
||||
if(stripos($this->RepoFilesystemPath, '.git') === false){
|
||||
$gitFolderPath = $gitFolderPath . '/.git';
|
||||
}
|
||||
$hash = substr(sha1($this->GitCommits[0]->Timestamp->format('U') . ' ' . $this->GitCommits[0]->Message), 0, 8);
|
||||
$this->CoverImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $hash . '-cover.jpg';
|
||||
$this->CoverImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $hash . '-cover@2x.jpg';
|
||||
$this->HeroImageUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $hash . '-hero.jpg';
|
||||
$this->HeroImage2xUrl = '/images/covers/' . $this->UrlSafeIdentifier . '-' . $hash . '-hero@2x.jpg';
|
||||
|
||||
// Now do some heavy XML lifting!
|
||||
$xml = new SimpleXmlElement(str_replace('xmlns=', 'ns=', $rawMetadata));
|
||||
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
|
||||
|
||||
$this->Title = $this->NullIfEmpty($xml->xpath('/package/metadata/dc:title'));
|
||||
if($this->Title === null){
|
||||
throw new EbookParsingException('Invalid <dc:title> element.');
|
||||
}
|
||||
|
||||
$this->Title = str_replace('\'', '’', $this->Title);
|
||||
|
||||
$this->FullTitle = $this->NullIfEmpty($xml->xpath('/package/metadata/dc:title[@id="fulltitle"]'));
|
||||
|
||||
$this->Timestamp = new \DateTime((string)$xml->xpath('/package/metadata/dc:date')[0]);
|
||||
|
||||
// Get SE tags
|
||||
foreach($xml->xpath('/package/metadata/meta[@property="meta-auth"]') as $tag){
|
||||
$this->Tags[] = (string)$tag;
|
||||
}
|
||||
|
||||
// Get LoC tags
|
||||
foreach($xml->xpath('/package/metadata/dc:subject') as $tag){
|
||||
$this->LocTags[] = (string)$tag;
|
||||
}
|
||||
|
||||
// Figure out authors and contributors.
|
||||
foreach($xml->xpath('/package/metadata/dc:creator') as $author){
|
||||
$id = $author->attributes()->id;
|
||||
$this->Authors[] = new Contributor( (string)$author,
|
||||
(string)$xml->xpath('/package/metadata/meta[@property="file-as"][@refines="#' . $id . '"]')[0],
|
||||
$this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]')),
|
||||
$this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]'))
|
||||
);
|
||||
}
|
||||
|
||||
if(sizeof($this->Authors) == 0){
|
||||
throw new EbookParsingException('Invalid <dc:creator> element.');
|
||||
}
|
||||
|
||||
$this->AuthorsUrl = preg_replace('|url:https://standardebooks.org/ebooks/([^/]+)/.*|ius', '/ebooks/\1/', $this->Identifier);
|
||||
|
||||
foreach($xml->xpath('/package/metadata/dc:contributor') as $contributor){
|
||||
$id = $contributor->attributes()->id;
|
||||
foreach($xml->xpath('/package/metadata/meta[@property="role"][@refines="#' . $id . '"]') as $role){
|
||||
$c = new Contributor(
|
||||
(string)$contributor,
|
||||
$this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="file-as"][@refines="#' . $id . '"]')),
|
||||
$this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:name.person.full-name"][@refines="#' . $id . '"]')),
|
||||
$this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][@refines="#' . $id . '"]'))
|
||||
);
|
||||
|
||||
if($role == 'trl'){
|
||||
$this->Translators[] = $c;
|
||||
}
|
||||
|
||||
if($role == 'ill'){
|
||||
$this->Illustrators[] = $c;
|
||||
}
|
||||
|
||||
if($role == 'ctb'){
|
||||
$this->Contributors[] = $c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some basic data.
|
||||
$this->Description = $this->NullIfEmpty($xml->xpath('/package/metadata/dc:description'));
|
||||
$this->Language = $this->NullIfEmpty($xml->xpath('/package/metadata/dc:language'));
|
||||
$this->LongDescription = $this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:long-description"]'));
|
||||
$this->WordCount = (int)$xml->xpath('/package/metadata/meta[@property="se:word-count"]')[0] ?? 0;
|
||||
$this->ReadingEase = (float)$xml->xpath('/package/metadata/meta[@property="se:reading-ease.flesch"]')[0] ?? 0;
|
||||
|
||||
if($this->ReadingEase !== null){
|
||||
if($this->ReadingEase >= 90){
|
||||
$this->ReadingEaseDescription = 'very easy';
|
||||
}
|
||||
|
||||
if($this->ReadingEase >= 79 && $this->ReadingEase <= 89){
|
||||
$this->ReadingEaseDescription = 'easy';
|
||||
}
|
||||
|
||||
if($this->ReadingEase > 69 && $this->ReadingEase <= 79){
|
||||
$this->ReadingEaseDescription = 'fairly easy';
|
||||
}
|
||||
|
||||
if($this->ReadingEase > 59 && $this->ReadingEase <= 69){
|
||||
$this->ReadingEaseDescription = 'average difficulty';
|
||||
}
|
||||
|
||||
if($this->ReadingEase > 49 && $this->ReadingEase <= 59){
|
||||
$this->ReadingEaseDescription = 'fairly difficult';
|
||||
}
|
||||
|
||||
if($this->ReadingEase > 39 && $this->ReadingEase <= 49){
|
||||
$this->ReadingEaseDescription = 'difficult';
|
||||
}
|
||||
|
||||
if($this->ReadingEase < 39){
|
||||
$this->ReadingEaseDescription = 'very difficult';
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out the reading time.
|
||||
$readingTime = ceil($this->WordCount / AVERAGE_READING_WORDS_PER_MINUTE);
|
||||
$this->ReadingTime = $readingTime;
|
||||
|
||||
if($readingTime < 60){
|
||||
$this->ReadingTime .= ' minute';
|
||||
if($readingTime != 1){
|
||||
$this->ReadingTime .= 's';
|
||||
}
|
||||
}
|
||||
else{
|
||||
$readingTimeHours = floor($readingTime / 60);
|
||||
$readingTimeMinutes = ceil($readingTime % 60);
|
||||
$this->ReadingTime = $readingTimeHours . ' hour';
|
||||
if($readingTimeHours != 1){
|
||||
$this->ReadingTime .= 's';
|
||||
}
|
||||
|
||||
if($readingTimeMinutes != 0){
|
||||
$this->ReadingTime .= ' ' . $readingTimeMinutes . ' minute';
|
||||
if($readingTimeMinutes != 1){
|
||||
$this->ReadingTime .= 's';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out ancillary links.
|
||||
|
||||
// First the Wikipedia URLs.
|
||||
$this->WikipediaUrl = $this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.encyclopedia.wikipedia"][not(@refines)]'));
|
||||
|
||||
// Next the page scan source URLs.
|
||||
foreach($xml->xpath('/package/metadata/dc:source') as $element){
|
||||
if(mb_stripos((string)$element, '//www.gutenberg.org/') !== false){
|
||||
$this->SourceUrls[] = ['source' => SOURCE_PROJECT_GUTENBERG, 'url' => (string)$element];
|
||||
}
|
||||
elseif(mb_stripos((string)$element, '//archive.org/') !== false){
|
||||
$this->SourceUrls[] = ['source' => SOURCE_INTERNET_ARCHIVE, 'url' => (string)$element];
|
||||
}
|
||||
elseif(mb_stripos((string)$element, 'hathitrust.org/') !== false){
|
||||
$this->SourceUrls[] = ['source' => SOURCE_HATHI_TRUST, 'url' => (string)$element];
|
||||
}
|
||||
elseif(mb_stripos((string)$element, 'wikisource.org/') !== false){
|
||||
$this->SourceUrls[] = ['source' => SOURCE_WIKISOURCE, 'url' => (string)$element];
|
||||
}
|
||||
elseif(mb_stripos((string)$element, 'books.google.com/') !== false){
|
||||
$this->SourceUrls[] = ['source' => SOURCE_GOOGLE_BOOKS, 'url' => (string)$element];
|
||||
}
|
||||
else{
|
||||
$otherUrls[] = [SOURCE_OTHER, (string)$element];
|
||||
}
|
||||
}
|
||||
|
||||
// Next the GitHub URLs.
|
||||
$this->GitHubUrl = $this->NullIfEmpty($xml->xpath('/package/metadata/meta[@property="se:url.vcs.github"][not(@refines)]'));
|
||||
|
||||
// Put together the full contributor string.
|
||||
$titleContributors = '';
|
||||
if(sizeof($this->Contributors) > 0){
|
||||
$titleContributors .= '. With ' . $this->GenerateContributorList($this->Contributors);
|
||||
$this->ContributorsHtml .= ' with ' . $this->GenerateContributorList($this->Contributors) . ';';
|
||||
}
|
||||
|
||||
if(sizeof($this->Translators) > 0){
|
||||
$titleContributors .= '. Translated by ' . $this->GenerateContributorList($this->Translators);
|
||||
$this->ContributorsHtml .= ' translated by ' . $this->GenerateContributorList($this->Translators) . ';';
|
||||
}
|
||||
|
||||
if(sizeof($this->Illustrators) > 0){
|
||||
$titleContributors .= '. Illustrated by ' . $this->GenerateContributorList($this->Illustrators);
|
||||
$this->ContributorsHtml .= ' illustrated by ' . $this->GenerateContributorList($this->Illustrators) . ';';
|
||||
}
|
||||
|
||||
if($this->ContributorsHtml !== null){
|
||||
$this->ContributorsHtml = ucfirst(rtrim(trim($this->ContributorsHtml), ';')) . '.';
|
||||
}
|
||||
|
||||
$this->AuthorsHtml = $this->GenerateContributorList($this->Authors);
|
||||
|
||||
// Now the complete title with credits.
|
||||
$this->TitleWithCreditsHtml = Formatter::ToPlainText($this->Title) . ', by ' . $this->AuthorsHtml . $titleContributors;
|
||||
}
|
||||
|
||||
public function Contains(string $query): bool{
|
||||
// When searching an ebook, we search the title, author(s), SE tags, and LoC tags.
|
||||
|
||||
$searchString = $this->FullTitle ?? $this->Title;
|
||||
|
||||
foreach($this->Authors as $author){
|
||||
$searchString .= ' ' . $author->Name;
|
||||
}
|
||||
|
||||
foreach($this->Tags as $tag){
|
||||
$searchString .= ' ' . $tag;
|
||||
}
|
||||
|
||||
foreach($this->LocTags as $tag){
|
||||
$searchString .= ' ' . $tag;
|
||||
}
|
||||
|
||||
// Remove diacritics and non-alphanumeric characters
|
||||
$searchString = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', @iconv('UTF-8', 'ASCII//TRANSLIT', $searchString)));
|
||||
$query = trim(preg_replace('|[^a-zA-Z0-9 ]|ius', ' ', @iconv('UTF-8', 'ASCII//TRANSLIT', $query)));
|
||||
|
||||
if(mb_stripos($searchString, $query) !== false){
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function GenerateJsonLd(): string{
|
||||
$output = new stdClass();
|
||||
$output->{'@context'} = 'https://schema.org';
|
||||
$output->{'@type'} = 'Book';
|
||||
$output->bookFormat = 'EBook';
|
||||
|
||||
$organizationObject = new stdClass();
|
||||
$organizationObject->{'@type'} = 'Organization';
|
||||
$organizationObject->name = 'Standard Ebooks';
|
||||
$organizationObject->logo = 'https://standardebooks.org/images/logo-full.svg';
|
||||
$organizationObject->url = 'https://standardebooks.org';
|
||||
$output->publisher = $organizationObject;
|
||||
|
||||
$output->name = $this->Title;
|
||||
$output->image = SITE_URL . $this->Url . '/dist/cover.jpg';
|
||||
$output->thumbnailUrl = SITE_URL . $this->Url . '/dist/cover-thumbnail.jpg';
|
||||
$output->url = SITE_URL . $this->Url;
|
||||
$output->{'@id'} = SITE_URL . $this->Url;
|
||||
$output->description = $this->Description;
|
||||
$output->inLanguage = $this->Language;
|
||||
|
||||
if($this->WikipediaUrl){
|
||||
$output->sameAs = $this->WikipediaUrl;
|
||||
}
|
||||
|
||||
$output->author = [];
|
||||
|
||||
foreach($this->Authors as $contributor){
|
||||
$output->author[] = $this->GenerateContributorJsonLd($contributor);
|
||||
}
|
||||
|
||||
$output->encoding = [];
|
||||
|
||||
if($this->EpubUrl){
|
||||
$encodingObject = new stdClass();
|
||||
$encodingObject->{'@type'} = 'MediaObject';
|
||||
$encodingObject->encodingFormat = 'epub';
|
||||
$encodingObject->contentUrl = SITE_URL . $this->EpubUrl;
|
||||
$output->encoding[] = $encodingObject;
|
||||
}
|
||||
|
||||
if($this->KepubUrl){
|
||||
$encodingObject = new stdClass();
|
||||
$encodingObject->{'@type'} = 'MediaObject';
|
||||
$encodingObject->encodingFormat = 'kepub';
|
||||
$encodingObject->contentUrl = SITE_URL . $this->KepubUrl;
|
||||
$output->encoding[] = $encodingObject;
|
||||
}
|
||||
|
||||
if($this->Epub3Url){
|
||||
$encodingObject = new stdClass();
|
||||
$encodingObject->{'@type'} = 'MediaObject';
|
||||
$encodingObject->encodingFormat = 'epub3';
|
||||
$encodingObject->contentUrl = SITE_URL . $this->Epub3Url;
|
||||
$output->encoding[] = $encodingObject;
|
||||
}
|
||||
|
||||
if($this->Azw3Url){
|
||||
$encodingObject = new stdClass();
|
||||
$encodingObject->{'@type'} = 'MediaObject';
|
||||
$encodingObject->encodingFormat = 'azw3';
|
||||
$encodingObject->contentUrl = SITE_URL . $this->Azw3Url;
|
||||
$output->encoding[] = $encodingObject;
|
||||
}
|
||||
|
||||
if(sizeof($this->Translators) > 0){
|
||||
$output->translator = [];
|
||||
foreach($this->Translators as $contributor){
|
||||
$output->translator[] = $this->GenerateContributorJsonLd($contributor);
|
||||
}
|
||||
}
|
||||
|
||||
if(sizeof($this->Illustrators) > 0){
|
||||
$output->illustrator = [];
|
||||
foreach($this->Illustrators as $contributor){
|
||||
$output->illustrator[] = $this->GenerateContributorJsonLd($contributor);
|
||||
}
|
||||
}
|
||||
|
||||
return json_encode($output, JSON_PRETTY_PRINT);
|
||||
}
|
||||
|
||||
private function GenerateContributorJsonLd(Contributor $contributor): stdClass{
|
||||
$object = new stdClass();
|
||||
$object->{'@type'} = 'Person';
|
||||
$object->name = $contributor->Name;
|
||||
|
||||
if($contributor->WikipediaUrl){
|
||||
$object->sameAs = $contributor->WikipediaUrl;
|
||||
}
|
||||
|
||||
if($contributor->FullName){
|
||||
$object->alternateName = $contributor->FullName;
|
||||
}
|
||||
|
||||
return $object;
|
||||
}
|
||||
|
||||
private function GenerateContributorList(array $contributors): string{
|
||||
// Inputs: An array of Contributor objects.
|
||||
|
||||
$string = '';
|
||||
$i = 0;
|
||||
foreach($contributors as $contributor){
|
||||
if($contributor->WikipediaUrl){
|
||||
$string .= '<a href="' . Formatter::ToPlainText($contributor->WikipediaUrl) .'">' . Formatter::ToPlainText($contributor->Name) . '</a>';
|
||||
}
|
||||
else{
|
||||
$string .= Formatter::ToPlainText($contributor->Name);
|
||||
}
|
||||
|
||||
if($i == sizeof($contributors) - 2 && sizeof($contributors) > 2){
|
||||
$string .= ', and ';
|
||||
}
|
||||
elseif($i == sizeof($contributors) - 2){
|
||||
$string .= ' and ';
|
||||
}
|
||||
elseif($i != sizeof($contributors) - 1){
|
||||
$string .= ', ';
|
||||
}
|
||||
|
||||
$i++;
|
||||
}
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
private function NullIfEmpty(array $elements){ // Can't use type hinting until PHP 7.1 which supports nullable return types
|
||||
// Helper function when getting values from SimpleXml.
|
||||
// Checks if the result is set, and returns the value if so; if the value is the empty string, return null.
|
||||
if(isset($elements[0])){
|
||||
$str = (string)$elements[0];
|
||||
if($str !== ''){
|
||||
return $str;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
?>
|
Loading…
Add table
Add a link
Reference in a new issue