diff --git a/README.md b/README.md index ea69e2a0..cd6e5a5a 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ Before submitting design contributions, please discuss them with the Standard Eb - Allow submitter or admins to edit unapproved artwork submissions. Approved/in use submissions should not be editable by anyone. -- Write responsive CSS to make artwork list at `/artworks` mobile-friendly. +- Include in-use ebook slug as a search parameter when searching for artwork by keyword. ## PHP code style diff --git a/lib/Artwork.php b/lib/Artwork.php index 8b5ecee6..2ffae717 100644 --- a/lib/Artwork.php +++ b/lib/Artwork.php @@ -323,15 +323,11 @@ class Artwork extends PropertiesBase{ $error->Add(new Exceptions\StringTooLongException('Link to an approved museum page')); } - if($this->MuseumUrl == '' || filter_var($this->MuseumUrl, FILTER_VALIDATE_URL) === false){ - $error->Add(new Exceptions\InvalidMuseumUrlException()); - } - - // Don't allow unapproved museums try{ - Museum::GetByUrl($this->MuseumUrl); + $this->Museum = Museum::GetByUrl($this->MuseumUrl); + $this->MuseumUrl = Museum::NormalizeUrl($this->MuseumUrl); } - catch(Exceptions\MuseumNotFoundException $ex){ + catch(Exceptions\MuseumNotFoundException | Exceptions\InvalidUrlException $ex){ $error->Add($ex); } } @@ -466,53 +462,68 @@ class Artwork extends PropertiesBase{ } if(stripos($parsedUrl['host'], 'hathitrust.org') !== false){ - // https://babel.hathitrust.org/cgi/pt?id=hvd.32044034383265&seq=13 + $exampleUrl = 'https://babel.hathitrust.org/cgi/pt?id=hvd.32044034383265&seq=13'; + if($parsedUrl['host'] != 'babel.hathitrust.org'){ - throw new Exceptions\InvalidHathiTrustUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } if($parsedUrl['path'] != '/cgi/pt'){ - throw new Exceptions\InvalidHathiTrustUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } parse_str($parsedUrl['query'] ?? '', $vars); if(!isset($vars['id']) || !isset($vars['seq']) || is_array($vars['id']) || is_array($vars['seq'])){ - throw new Exceptions\InvalidHathiTrustUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path'] . '?id=' . $vars['id'] . '&seq=' . $vars['seq']; + + return $outputUrl; } if(stripos($parsedUrl['host'], 'archive.org') !== false){ - // https://archive.org/details/royalacademypict1902roya/page/n9/mode/1up + $exampleUrl = 'https://archive.org/details/royalacademypict1902roya/page/n9/mode/1up'; if($parsedUrl['host'] != 'archive.org'){ - throw new Exceptions\InvalidInternetArchiveUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } - if(!preg_match('|^/details/[^/]+?/page/[^/]+/mode/1up$|ius', $parsedUrl['path'])){ - throw new Exceptions\InvalidInternetArchiveUrlException(); + // If we're missing the view mode, append it + if(preg_match('|^/details/[^/]+?/page/[^/]+$|ius', $parsedUrl['path'])){ + $parsedUrl['path'] = $parsedUrl['path'] . '/mode/1up'; + } + + // archive.org URLs may have both a book ID and collection ID, like + // https://archive.org/details/TheStrandMagazineAnIllustratedMonthly/TheStrandMagazine1914bVol.XlviiiJul-dec/page/n254/mode/1up + // The `/page/` portion of the URL may also be missing if we're on page 1 (like the cover) + if(!preg_match('|^/details/[^/]+?(/[^/]+?)?(/page/[^/]+)?/mode/1up$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; } if(stripos($parsedUrl['host'], 'google.com') !== false){ // Old style: https://books.google.com/books?id=mZpAAAAAYAAJ&pg=PA70-IA2 // New style: https://www.google.com/books/edition/_/mZpAAAAAYAAJ?gbpv=1&pg=PA70-IA2 + $exampleUrl = 'https://www.google.com/books/edition/_/mZpAAAAAYAAJ?gbpv=1&pg=PA70-IA2'; + if($parsedUrl['host'] == 'books.google.com'){ // Old style, convert to new style if($parsedUrl['path'] != '/books'){ - throw new Exceptions\InvalidGoogleBooksUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } parse_str($parsedUrl['query'] ?? '', $vars); if(!isset($vars['id']) || !isset($vars['pg']) || is_array($vars['id']) || is_array($vars['pg'])){ - throw new Exceptions\InvalidGoogleBooksUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } $outputUrl = 'https://www.google.com/books/edition/_/' . $vars['id'] . '?gbpv=1&pg=' . $vars['pg']; @@ -521,7 +532,7 @@ class Artwork extends PropertiesBase{ // New style if(!preg_match('|^/books/edition/[^/]+/[^/]+$|ius', $parsedUrl['path'])){ - throw new Exceptions\InvalidGoogleBooksUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } preg_match('|^/books/edition/[^/]+/([^/]+)$|ius', $parsedUrl['path'], $matches); @@ -530,14 +541,16 @@ class Artwork extends PropertiesBase{ parse_str($parsedUrl['query'] ?? '', $vars); if(!isset($vars['gbpv']) || $vars['gbpv'] !== '1' || !isset($vars['pg']) || is_array($vars['pg'])){ - throw new Exceptions\InvalidGoogleBooksUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } $outputUrl = 'https://' . $parsedUrl['host'] . '/books/edition/_/' . $id . '?gbpv=' . $vars['gbpv'] . '&pg=' . $vars['pg']; } else{ - throw new Exceptions\InvalidGoogleBooksUrlException(); + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); } + + return $outputUrl; } return $outputUrl; diff --git a/lib/Exceptions/InvalidGoogleBooksUrlException.php b/lib/Exceptions/InvalidGoogleBooksUrlException.php deleted file mode 100644 index caf544ed..00000000 --- a/lib/Exceptions/InvalidGoogleBooksUrlException.php +++ /dev/null @@ -1,6 +0,0 @@ -message = 'Invalid museum URL: <' . $url . '>. Expected a URL like: <'. $exampleUrl . '>.'; + } } diff --git a/lib/Exceptions/InvalidPageScanUrlException.php b/lib/Exceptions/InvalidPageScanUrlException.php new file mode 100644 index 00000000..a7d37f83 --- /dev/null +++ b/lib/Exceptions/InvalidPageScanUrlException.php @@ -0,0 +1,8 @@ +message = 'Invalid page scan URL: <' . $url . '>. Expected a URL like: <'. $exampleUrl . '>.'; + } +} diff --git a/lib/Exceptions/InvalidUrlException.php b/lib/Exceptions/InvalidUrlException.php index 71d99141..87a27a0d 100644 --- a/lib/Exceptions/InvalidUrlException.php +++ b/lib/Exceptions/InvalidUrlException.php @@ -6,7 +6,7 @@ class InvalidUrlException extends AppException{ public function __construct(?string $url = null){ if($url !== null){ - parent::__construct('Invalid URL: “' . $url . '”.'); + parent::__construct('Invalid URL: <' . $url . '>.'); } } } diff --git a/lib/Museum.php b/lib/Museum.php index a93de547..a8ccde7c 100644 --- a/lib/Museum.php +++ b/lib/Museum.php @@ -1,11 +1,519 @@ in which the text before : is for SEO and can be cut + $exampleUrl = 'https://www.si.edu/object/saam_1983.95.90'; + + if(!preg_match('|/object/[^/]+?:[^/:]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $path = preg_replace('^|/object/[^/]+:([^/:]+)$|ius', '/object/\1', $parsedUrl['path']); + + $outputUrl = 'https://' . $parsedUrl['host'] . $path; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'americanart.si.edu') !== false){ + $exampleUrl = 'https://americanart.si.edu/artwork/study-apotheosis-washington-rotunda-united-states-capitol-building-84517'; + + $path = $parsedUrl['path']; + if(!preg_match('|/object/[^/]+?:[^/:]+$|ius', $path)){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/artwork/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'collections.si.edu') !== false){ + // These URLs can actually be normalized to a www.si.edu URL by pulling out the object ID + $exampleUrl = 'https://collections.si.edu/search/detail/edanmdm:saam_1981.146.1'; + + $path = $parsedUrl['path']; + if(!preg_match('|/search/detail/[^/]+?:[^/:]+$|ius', $path)){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $path = preg_replace('|/search/detail/[^/]+:([^/:]+)$|ius', '/object/\1', $parsedUrl['path']); + + $outputUrl = 'https://www.si.edu' . $path; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'npg.si.edu') !== false){ + // These URLs can actually be normalized to a www.si.edu URL by pulling out the object ID + $exampleUrl = 'https://npg.si.edu/object/npg_NPG.2008.5'; + + $path = $parsedUrl['path']; + if(!preg_match('|/object/[^/]+$|ius', $path)){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://www.si.edu' . $path; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'birminghammuseums.org.uk') !== false){ + $exampleUrl = 'https://dams.birminghammuseums.org.uk/asset-bank/action/viewAsset?id=6726'; + + if($parsedUrl['host'] != 'dams.birminghammuseums.org.uk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if($parsedUrl['path'] != '/asset-bank/action/viewAsset'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + parse_str($parsedUrl['query'] ?? '', $vars); + + if(!isset($vars['id']) || is_array($vars['id'])){ + throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path'] . '?id=' . $vars['id']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'mnk.pl') !== false){ + $exampleUrl = 'https://zbiory.mnk.pl/en/search-result/catalog/333584'; + + if($parsedUrl['host'] != 'zbiory.mnk.pl'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/en/search-result/catalog/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'smk.dk') !== false){ + $exampleUrl = 'https://open.smk.dk/artwork/image/KMS1884'; + + if($parsedUrl['host'] != 'open.smk.dk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/artwork/image/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'kansallisgalleria.fi') !== false){ + $exampleUrl = 'https://www.kansallisgalleria.fi/en/object/429609'; + + if($parsedUrl['host'] != 'www.kansallisgalleria.fi'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/en/object/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'nga.gov') !== false){ + $exampleUrl = 'https://www.nga.gov/collection/art-object-page.46522.html'; + + if($parsedUrl['host'] != 'www.nga.gov'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/collection/art-object-page\.[^/]+\.html$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'nivaagaard.dk') !== false){ + $exampleUrl = 'https://www.nivaagaard.dk/en/vare/lundstroem-vilhelm/'; + + if($parsedUrl['host'] != 'www.nivaagaard.dk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/en/vare/[^/]+/$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'risdmuseum.org') !== false){ + $exampleUrl = 'https://risdmuseum.org/art-design/collection/portrait-christiana-carteaux-bannister-2016381'; + + if($parsedUrl['host'] != 'risdmuseum.org'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/art-design/collection/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'aberdeencity.gov.uk') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://emuseum.aberdeencity.gov.uk/objects/3215/james-cromar-watt-lld'; + + if($parsedUrl['host'] != 'emuseum.aberdeencity.gov.uk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/objects/[^/]+(/[^/]+)?$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $path = preg_replace('|^/objects/([^/]+)(/[^/]+)?$|ius', '/objects/\1', $parsedUrl['path']); + + $outputUrl = 'https://' . $parsedUrl['host'] . $path; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'brightonmuseums.org.uk') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://collections.brightonmuseums.org.uk/records/63caa90083d50a00184b8e90'; + + if($parsedUrl['host'] != 'collections.brightonmuseums.org.uk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/records/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'grpmcollections.org') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://www.grpmcollections.org/Detail/objects/130684'; + + if($parsedUrl['host'] != 'www.grpmcollections.org'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/Detail/objects/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'thorvaldsensmuseum.dk') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://kataloget.thorvaldsensmuseum.dk/en/B122'; + + if($parsedUrl['host'] != 'kataloget.thorvaldsensmuseum.dk'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/en/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'museabrugge.be') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://collectie.museabrugge.be/en/collection/work/id/2013_GRO0013_I'; + + if($parsedUrl['host'] != 'collectie.museabrugge.be'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/en/collection/work/id/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'britishart.yale.edu') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://collections.britishart.yale.edu/catalog/tms:1010'; + + if($parsedUrl['host'] != 'collections.britishart.yale.edu'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/catalog/[^/]+$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'kunsthalle-karlsruhe.de') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://www.kunsthalle-karlsruhe.de/kunstwerke/Ferdinand-Keller/K%C3%BCstenlandschaft-bei-Rio-de-Janeiro/C066F030484D7D09148891B0E70524B8/'; + + if($parsedUrl['host'] != 'www.kunsthalle-karlsruhe.de'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/kunstwerke/[^/]+?/[^/]+?/[^/]+?/$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'getty.edu') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://www.getty.edu/art/collection/object/103RG0'; + + if($parsedUrl['host'] != 'www.getty.edu'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/art/collection/object/[^/]+?$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + if(stripos($parsedUrl['host'], 'artgallery.yale.edu') !== false){ + // All we need is the int object ID, the last slug is SEO + $exampleUrl = 'https://artgallery.yale.edu/collections/objects/44306'; + + if($parsedUrl['host'] != 'artgallery.yale.edu'){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + if(!preg_match('|^/collections/objects/[^/]+?$|ius', $parsedUrl['path'])){ + throw new Exceptions\InvalidMuseumUrlException($url, $exampleUrl); + } + + $outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path']; + + return $outputUrl; + } + + return $outputUrl; + } + public static function GetByUrl(?string $url): Museum{ if($url === null){ throw new Exceptions\MuseumNotFoundException();