mirror of
https://github.com/standardebooks/web.git
synced 2025-07-07 15:20:32 -04:00
172 lines
6.1 KiB
PHP
Executable file
172 lines
6.1 KiB
PHP
Executable file
#!/usr/bin/php
|
||
<?
|
||
require_once('/standardebooks.org/web/lib/Core.php');
|
||
|
||
$longopts = ['repoDir:', 'workDir:', 'ebookWwwFilesystemPath:', 'verbose'];
|
||
$options = getopt('v', $longopts);
|
||
|
||
$repoDir = $options['repoDir'] ?? false;
|
||
$workDir = $options['workDir'] ?? false;
|
||
$ebookWwwFilesystemPath = $options['ebookWwwFilesystemPath'] ?? false;
|
||
|
||
$verbose = false;
|
||
if(isset($options['v']) || isset($options['verbose'])){
|
||
$verbose = true;
|
||
}
|
||
|
||
/**
|
||
* Coverts SimpleXMLElement objects with inner tags like this:
|
||
* '<abbr>Mr.</abbr> Smith'
|
||
* to:
|
||
* 'Mr. Smith'
|
||
*/
|
||
function StripInnerTags($elements): ?string{
|
||
if($elements === false){
|
||
return null;
|
||
}
|
||
|
||
if(isset($elements[0])){
|
||
return strip_tags($elements[0]->asXML());
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
if(!$repoDir || !$workDir || !$ebookWwwFilesystemPath){
|
||
print("Expected usage: upsert-to-cover-art-database [-v] --repoDir <dir> --workDir <dir> --ebookWwwFilesystemPath <path>\n");
|
||
exit(1);
|
||
}
|
||
|
||
if($verbose){
|
||
print("\nrepoDir: $repoDir\n");
|
||
print("workDir: $workDir\n");
|
||
print("ebookWwwFilesystemPath: $ebookWwwFilesystemPath\n");
|
||
}
|
||
|
||
chdir($repoDir);
|
||
$contentOpf = shell_exec("git show HEAD:src/epub/content.opf");
|
||
$xml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $contentOpf));
|
||
$xml->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
|
||
$artistName = StripInnerTags($xml->xpath('/package/metadata/dc:contributor[@id="artist"]'));
|
||
if($artistName === null){
|
||
// Some ebooks have an artist-1 and artist-2. Take artist-1, which isn't ideal, but is usually correct.
|
||
$artistName = StripInnerTags($xml->xpath('/package/metadata/dc:contributor[@id="artist-1"]'));
|
||
if($artistName === null){
|
||
print($repoDir . " Error: Could not find artist name in content.opf\n");
|
||
exit($repoDir . " Error: missing artistName\n");
|
||
}
|
||
}
|
||
|
||
if(!file_exists($ebookWwwFilesystemPath . '/text/colophon.xhtml')){
|
||
exit($repoDir . ' Error: no text/colophon.xhtml at ' . $ebookWwwFilesystemPath . "\n");
|
||
}
|
||
|
||
$rawColophon = file_get_contents($ebookWwwFilesystemPath . '/text/colophon.xhtml');
|
||
if(empty($rawColophon)){
|
||
exit($repoDir . ' Error: empty colophon at ' . $ebookWwwFilesystemPath . "\n");
|
||
}
|
||
|
||
preg_match('|a painting completed \w+ (\d+)|ius', $rawColophon, $matches);
|
||
$completedYear = null;
|
||
if(sizeof($matches) == 2){
|
||
$completedYear = (int)$matches[1];
|
||
}
|
||
|
||
$colophonXml = new SimpleXMLElement(str_replace('xmlns=', 'ns=', $rawColophon));
|
||
$artworkName = StripInnerTags($colophonXml->xpath('/html/body/main/section/p/i[@epub:type="se:name.visual-art.painting"]'));
|
||
if($artworkName === null){
|
||
print($repoDir . " Error: Could not find artwork name in colophon.xhtml\n");
|
||
exit($repoDir . " Error: missing artworkName");
|
||
}
|
||
|
||
$artistUrlName = Formatter::MakeUrlSafe($artistName);
|
||
$artworkUrlName = Formatter::MakeUrlSafe($artworkName);
|
||
$artwork = null;
|
||
|
||
if($verbose){
|
||
print("artistName: $artistName\n");
|
||
print("artistUrlName: $artistUrlName\n");
|
||
print("completedYear: $completedYear\n");
|
||
print("artworkName: $artworkName\n");
|
||
print("artworkUrlName: $artworkUrlName\n");
|
||
}
|
||
|
||
try{
|
||
$artwork = Artwork::GetByUrlAndIsApproved($artistUrlName, $artworkUrlName);
|
||
}
|
||
catch(Exceptions\ArtworkNotFoundException){
|
||
// $artwork is null by default, just continue
|
||
}
|
||
|
||
if($artwork === null){
|
||
if($verbose){
|
||
printf($repoDir . " No existing artwork found at %s/%s, inserting new artwork.\n", $artistUrlName, $artworkUrlName);
|
||
}
|
||
|
||
// The ebook colophon provides the artist's name, but not their death year.
|
||
// Prefer matching an existing artist to creating a new record with a null death year if possible.
|
||
$artist = Artist::FindMatch($artistName);
|
||
if($artist === null){
|
||
$artist = new Artist();
|
||
$artist->Name = $artistName;
|
||
}
|
||
|
||
$artwork = new Artwork();
|
||
$artwork->Artist = new Artist();
|
||
$artwork->Artist = $artist;
|
||
$artwork->Name = $artworkName;
|
||
$artwork->CompletedYear = $completedYear;
|
||
$artwork->CompletedYearIsCirca = false;
|
||
$artwork->Created = new DateTime();
|
||
$artwork->Status = COVER_ARTWORK_STATUS_IN_USE;
|
||
$artwork->EbookWwwFilesystemPath = $ebookWwwFilesystemPath;
|
||
$artwork->MimeType = ImageMimeType::JPG;
|
||
|
||
$coverSourceFile = tempnam($workDir, 'cover.source.');
|
||
// Search for JPEG, PNG, and TIFF source files, in that order.
|
||
exec("git show HEAD:images/cover.source.jpg > $coverSourceFile.jpg", $shellOutput, $resultCode);
|
||
if($resultCode !== 0){
|
||
// No JPEG, try PNG.
|
||
exec("git show HEAD:images/cover.source.png > $coverSourceFile.png", $shellOutput, $resultCode);
|
||
if($resultCode == 0){
|
||
// Found PNG, convert it to JPEG.
|
||
exec("convert $coverSourceFile.png -resize '3750x>' -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG $coverSourceFile.jpg", $shellOutput, $resultCode);
|
||
if($resultCode !== 0){
|
||
exit($repoDir . " Error: Failed to convert images/cover.source.png to JPEG\n");
|
||
}
|
||
}else{
|
||
// No JPEG or PNG, try TIFF.
|
||
exec("git show HEAD:images/cover.source.tif > $coverSourceFile.tif", $shellOutput, $resultCode);
|
||
if($resultCode == 0){
|
||
// Found TIFF, convert it to JPEG.
|
||
exec("convert $coverSourceFile.tif -resize '3750x>' -sampling-factor 4:2:0 -strip -quality 80 -colorspace RGB -interlace JPEG $coverSourceFile.jpg", $shellOutput, $resultCode);
|
||
if($resultCode !== 0){
|
||
exit($repoDir . " Error: Failed to convert images/cover.source.tif to JPEG\n");
|
||
}
|
||
}else{
|
||
exit($repoDir . " Error: no images/cover.source.jpg or images/cover.source.png or images/cover.source.tif\n");
|
||
|
||
}
|
||
}
|
||
}
|
||
|
||
$uploadedFile = ['tmp_name' => $coverSourceFile . '.jpg', 'error' => UPLOAD_ERR_OK];
|
||
$artwork->Create($uploadedFile);
|
||
}
|
||
else{
|
||
if($verbose){
|
||
printf($repoDir . " Existing artwork found at %s/%s, updating its status.\n", $artistUrlName, $artworkUrlName);
|
||
}
|
||
|
||
if($artwork->CompletedYear != $completedYear){
|
||
printf($repoDir . " Error: Existing database artwork completed year, %d, does not match ebook colophon completed year, %d. Not updating database.\n", $artwork->CompletedYear, $completedYear);
|
||
exit($repoDir . " Error: completed year\n");
|
||
}
|
||
|
||
if($artwork->Status === COVER_ARTWORK_STATUS_IN_USE){
|
||
printf($repoDir . " Error: Existing database artwork already marked as 'in_use' by ebook '%s'. Not updating database.\n", $artwork->EbookWwwFilesystemPath);
|
||
exit($repoDir . " Error: in_use\n");
|
||
}
|
||
|
||
$artwork->MarkInUse($ebookWwwFilesystemPath);
|
||
}
|