web/scripts/upsert-to-cover-art-database

172 lines
6.1 KiB
PHP
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/php
<?
require_once('/standardebooks.org/web/lib/Core.php');
$longopts = ['repoDir:', 'workDir:', 'ebookWwwFilesystemPath:', 'verbose'];
$options = getopt('v', $longopts);
$repoDir = $options['repoDir'] ?? false;
$workDir = $options['workDir'] ?? false;
$ebookWwwFilesystemPath = $options['ebookWwwFilesystemPath'] ?? false;
$verbose = false;
if(isset($options['v']) || isset($options['verbose'])){
$verbose = true;
}
/**
* Coverts SimpleXMLElement objects with inner tags like this:
* '<abbr>Mr.</abbr> Smith'
* to:
* 'Mr. Smith'
*/
function StripInnerTags($elements): ?string{
if($elements === false){
return null;
}
if(isset($elements[0])){
return strip_tags($elements[0]->asXML());
}
return null;
}
if(!$repoDir || !$workDir || !$ebookWwwFilesystemPath){
print("Expected usage: upsert-to-cover-art-database [-v] --repoDir <dir> --workDir <dir> --ebookWwwFilesystemPath <path>\n");
exit(1);
}
if($verbose){
print("\nrepoDir: $repoDir\n");
print("workDir: $workDir\n");
print("ebookWwwFilesystemPath: $ebookWwwFilesystemPath\n");
}
chdir($repoDir);
$contentOpf = shell_exec("git show HEAD:src/epub/content.opf");
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $contentOpf));
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$artistName = StripInnerTags($xml->xpath('/package/metadata/dc:contributor[@id="artist"]'));
if($artistName === null){
// Some ebooks have an artist-1 and artist-2. Take artist-1, which isn't ideal, but is usually correct.
$artistName = StripInnerTags($xml->xpath('/package/metadata/dc:contributor[@id="artist-1"]'));
if($artistName === null){
print($repoDir . " Error: Could not find artist name in content.opf\n");
exit($repoDir . " Error: missing artistName\n");
}
}
if(!file_exists($ebookWwwFilesystemPath . '/text/colophon.xhtml')){
exit($repoDir . ' Error: no text/colophon.xhtml at ' . $ebookWwwFilesystemPath . "\n");
}
$rawColophon = file_get_contents($ebookWwwFilesystemPath . '/text/colophon.xhtml');
if(empty($rawColophon)){
exit($repoDir . ' Error: empty colophon at ' . $ebookWwwFilesystemPath . "\n");
}
preg_match('|a painting completed \w+ (\d+)|ius', $rawColophon, $matches);
$completedYear = null;
if(sizeof($matches) == 2){
$completedYear = (int)$matches[1];
}
$colophonXml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $rawColophon));
$artworkName = StripInnerTags($colophonXml->xpath('/html/body/main/section/p/i[@epub:type="se:name.visual-art.painting"]'));
if($artworkName === null){
print($repoDir . " Error: Could not find artwork name in colophon.xhtml\n");
exit($repoDir . " Error: missing artworkName");
}
$artistUrlName = Formatter::MakeUrlSafe($artistName);
$artworkUrlName = Formatter::MakeUrlSafe($artworkName);
$artwork = null;
if($verbose){
print("artistName: $artistName\n");
print("artistUrlName: $artistUrlName\n");
print("completedYear: $completedYear\n");
print("artworkName: $artworkName\n");
print("artworkUrlName: $artworkUrlName\n");
}
try{
$artwork = Artwork::GetByUrlAndIsApproved($artistUrlName, $artworkUrlName);
}
catch(Exceptions\ArtworkNotFoundException){
// $artwork is null by default, just continue
}
if($artwork === null){
if($verbose){
printf($repoDir . " No existing artwork found at %s/%s, inserting new artwork.\n", $artistUrlName, $artworkUrlName);
}
// The ebook colophon provides the artist's name, but not their death year.
// Prefer matching an existing artist to creating a new record with a null death year if possible.
$artist = Artist::FindMatch($artistName);
if($artist === null){
$artist = new Artist();
$artist->Name = $artistName;
}
$artwork = new Artwork();
$artwork->Artist = new Artist();
$artwork->Artist = $artist;
$artwork->Name = $artworkName;
$artwork->CompletedYear = $completedYear;
$artwork->CompletedYearIsCirca = false;
$artwork->Created = new DateTime();
$artwork->Status = COVER_ARTWORK_STATUS_IN_USE;
$artwork->EbookWwwFilesystemPath = $ebookWwwFilesystemPath;
$artwork->MimeType = ImageMimeType::JPG;
$coverSourceFile = tempnam($workDir, 'cover.source.');
// Search for JPEG, PNG, and TIFF source files, in that order.
exec("git show HEAD:images/cover.source.jpg > $coverSourceFile.jpg", $shellOutput, $resultCode);
if($resultCode !== 0){
// No JPEG, try PNG.
exec("git show HEAD:images/cover.source.png > $coverSourceFile.png", $shellOutput, $resultCode);
if($resultCode == 0){
// Found PNG, convert it to JPEG.
exec("convert $coverSourceFile.png -resize '3750x>' -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG $coverSourceFile.jpg", $shellOutput, $resultCode);
if($resultCode !== 0){
exit($repoDir . " Error: Failed to convert images/cover.source.png to JPEG\n");
}
}else{
// No JPEG or PNG, try TIFF.
exec("git show HEAD:images/cover.source.tif > $coverSourceFile.tif", $shellOutput, $resultCode);
if($resultCode == 0){
// Found TIFF, convert it to JPEG.
exec("convert $coverSourceFile.tif -resize '3750x>' -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG $coverSourceFile.jpg", $shellOutput, $resultCode);
if($resultCode !== 0){
exit($repoDir . " Error: Failed to convert images/cover.source.tif to JPEG\n");
}
}else{
exit($repoDir . " Error: no images/cover.source.jpg or images/cover.source.png or images/cover.source.tif\n");
}
}
}
$uploadedFile = ['tmp_name' => $coverSourceFile . '.jpg', 'error' => UPLOAD_ERR_OK];
$artwork->Create($uploadedFile);
}
else{
if($verbose){
printf($repoDir . " Existing artwork found at %s/%s, updating its status.\n", $artistUrlName, $artworkUrlName);
}
if($artwork->CompletedYear != $completedYear){
printf($repoDir . " Error: Existing database artwork completed year, %d, does not match ebook colophon completed year, %d. Not updating database.\n", $artwork->CompletedYear, $completedYear);
exit($repoDir . " Error: completed year\n");
}
if($artwork->Status === COVER_ARTWORK_STATUS_IN_USE){
printf($repoDir . " Error: Existing database artwork already marked as 'in_use' by ebook '%s'. Not updating database.\n", $artwork->EbookWwwFilesystemPath);
exit($repoDir . " Error: in_use\n");
}
$artwork->MarkInUse($ebookWwwFilesystemPath);
}