From 6b27a79a9ddf74d1fd2df7402a25a10284aab430 Mon Sep 17 00:00:00 2001 From: wladif Date: Sat, 17 Jun 2023 21:57:03 +0300 Subject: [PATCH] Support files without extensions in importFromUrl (#1873) * Enhance URL import to work files without extension * Improve unknown file type when importing from URL * Update app/Image/Files/BaseMediaFile.php * minor fix, use quick returns instead of large if-else --- app/Actions/Import/FromUrl.php | 8 ++-- app/Image/Files/BaseMediaFile.php | 44 +++++++++++++++++ app/Image/Files/DownloadedFile.php | 56 ++++++++++++++++++++-- tests/Feature/Constants/TestConstants.php | 2 + tests/Feature/PhotosAddMethodsTest.php | 18 +++++++ tests/Feature/PhotosAddNegativeTest.php | 25 ++++++++++ tests/Samples/tiff | Bin 0 -> 5802 bytes 7 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 tests/Samples/tiff diff --git a/app/Actions/Import/FromUrl.php b/app/Actions/Import/FromUrl.php index 44876119940..54f2a1ed22f 100644 --- a/app/Actions/Import/FromUrl.php +++ b/app/Actions/Import/FromUrl.php @@ -55,9 +55,11 @@ public function do(array $urls, ?Album $album, int $intendedOwnerId): Collection $path = parse_url($url, PHP_URL_PATH); $extension = '.' . pathinfo($path, PATHINFO_EXTENSION); - // Validate photo extension even when `$create->add()` will do later. - // This prevents us from downloading unsupported files. - BaseMediaFile::assertIsSupportedOrAcceptedFileExtension($extension); + if ($extension !== '.') { + // Validate photo extension even when `$create->add()` will do later. + // This prevents us from downloading unsupported files. + BaseMediaFile::assertIsSupportedOrAcceptedFileExtension($extension); + } // Download file $downloadedFile = new DownloadedFile($url); diff --git a/app/Image/Files/BaseMediaFile.php b/app/Image/Files/BaseMediaFile.php index f19e6e96145..5b0e3cce0ee 100644 --- a/app/Image/Files/BaseMediaFile.php +++ b/app/Image/Files/BaseMediaFile.php @@ -76,6 +76,24 @@ abstract class BaseMediaFile extends AbstractBinaryBlob implements MediaFile 'application/octet-stream', // Some mp4 files; will be corrected by the metadata extractor ]; + public const MIME_TYPES_TO_FILE_EXTENSIONS = [ + 'image/gif' => '.gif', + 'image/jpeg' => '.jpg', + 'image/png' => '.png', + 'image/webp' => '.webp', + 'video/mp4' => '.mp4', + 'video/mpeg' => '.mpg', + 'image/x-tga' => '.mpg', + 'video/ogg' => '.ogv', + 'video/webm' => '.webm', + 'video/quicktime' => '.mov', + 'video/x-ms-asf' => '.wmv', + 'video/x-ms-wmv' => '.wmv', + 'video/x-msvideo' => '.avi', + 'video/x-m4v' => '.avi', + 'application/octet-stream' => '.mp4', + ]; + /** @var string[] the accepted raw file extensions minus supported extensions */ private static array $cachedAcceptedRawFileExtensions = []; @@ -326,4 +344,30 @@ public static function assertIsSupportedOrAcceptedFileExtension(string $extensio throw new MediaFileUnsupportedException(MediaFileUnsupportedException::DEFAULT_MESSAGE . ' (bad extension: ' . $extension . ')'); } } + + /** + * Check if the given mimetype is supported or accepted. + * + * @param ?string $mimeType the file mimetype + * + * @return bool + */ + public static function isSupportedMimeType(?string $mimeType): bool + { + return + self::isSupportedImageMimeType($mimeType) || + self::isSupportedVideoMimeType($mimeType); + } + + /** + * Returns the default file extension for the given MIME type or an empty string if there is no default extension. + * + * @param string $mimeType a MIME type + * + * @return string the default file extension for the given MIME type + */ + public static function getDefaultFileExtensionForMimeType(string $mimeType): string + { + return self::MIME_TYPES_TO_FILE_EXTENSIONS[strtolower($mimeType)] ?? ''; + } } diff --git a/app/Image/Files/DownloadedFile.php b/app/Image/Files/DownloadedFile.php index ae5b7aa05e4..6ce985e72c7 100644 --- a/app/Image/Files/DownloadedFile.php +++ b/app/Image/Files/DownloadedFile.php @@ -3,11 +3,16 @@ namespace App\Image\Files; use App\Exceptions\MediaFileOperationException; +use App\Exceptions\MediaFileUnsupportedException; use Safe\Exceptions\PcreException; use function Safe\fclose; use function Safe\fopen; +use function Safe\mime_content_type; use function Safe\parse_url; use function Safe\preg_match; +use function Safe\rewind; +use function Safe\stream_copy_to_stream; +use function Safe\tmpfile; /** * Represents a temporary local file which has been downloaded. @@ -31,10 +36,12 @@ public function __construct(string $url) $path = parse_url($url, PHP_URL_PATH); $basename = pathinfo($path, PATHINFO_FILENAME); $extension = '.' . pathinfo($path, PATHINFO_EXTENSION); - parent::__construct($extension, $basename); $downloadStream = fopen($url, 'rb'); $downloadStreamData = stream_get_meta_data($downloadStream); + + /** @var string|null $originalMimeType */ + $originalMimeType = null; // Find the server-side MIME type; the HTTP headers are part of // the protocol-specific meta-data of the stream handler foreach ($downloadStreamData['wrapper_data'] as $http_header) { @@ -46,12 +53,55 @@ public function __construct(string $url) PREG_UNMATCHED_AS_NULL ); if (count($matches) === 2 && $matches[1]) { - $this->originalMimeType = $matches[1]; + $originalMimeType = $matches[1]; break; } } - $this->write($downloadStream); + + // When the URL doesn't contain the file's extension, the web server may or may have not set the + // Content-Type correctly. If the Content-Type header has a value that we recognize, we consider it valid. + // In all other cases we try to guess the file type. + // File extension > Content-Type > Inferred MIME type + + if (self::isSupportedOrAcceptedFileExtension($extension)) { + parent::__construct($extension, $basename); + $this->originalMimeType = $originalMimeType; + $this->write($downloadStream); + fclose($downloadStream); + + return; + } + + if (self::isSupportedMimeType($originalMimeType)) { + $extension = self::getDefaultFileExtensionForMimeType($originalMimeType); + parent::__construct($extension, $basename); + $this->originalMimeType = $originalMimeType; + $this->write($downloadStream); + fclose($downloadStream); + + return; + } + + $temp = tmpfile(); + stream_copy_to_stream($downloadStream, $temp); fclose($downloadStream); + + rewind($temp); + $originalMimeType = mime_content_type($temp); + + if (self::isSupportedMimeType($originalMimeType)) { + $extension = self::getDefaultFileExtensionForMimeType($originalMimeType); + parent::__construct($extension, $basename); + $this->originalMimeType = $originalMimeType; + rewind($temp); + $this->write($temp); + fclose($temp); + + return; + } + + fclose($temp); + throw new MediaFileUnsupportedException(MediaFileUnsupportedException::DEFAULT_MESSAGE . ' (bad file type: ' . $originalMimeType . ')'); } catch (\ErrorException|PcreException $e) { throw new MediaFileOperationException($e->getMessage(), $e); } diff --git a/tests/Feature/Constants/TestConstants.php b/tests/Feature/Constants/TestConstants.php index c0dbb89825d..86e6dd25771 100644 --- a/tests/Feature/Constants/TestConstants.php +++ b/tests/Feature/Constants/TestConstants.php @@ -17,7 +17,9 @@ class TestConstants public const MIME_TYPE_VID_QUICKTIME = 'video/quicktime'; public const SAMPLE_DOWNLOAD_JPG = 'https://github.com/LycheeOrg/Lychee/raw/master/tests/Samples/mongolia.jpeg'; + public const SAMPLE_DOWNLOAD_JPG_WITHOUT_EXTENSION = 'https://github.com/LycheeOrg/Lychee/raw/master/tests/Samples/mongolia'; public const SAMPLE_DOWNLOAD_TIFF = 'https://github.com/LycheeOrg/Lychee/raw/master/tests/Samples/tiff.tif'; + public const SAMPLE_DOWNLOAD_TIFF_WITHOUT_EXTENSION = 'https://github.com/wladif/Lychee/raw/master/tests/Samples/tiff'; public const SAMPLE_FILE_AARHUS = 'tests/Samples/aarhus.jpg'; public const SAMPLE_FILE_ETTLINGEN = 'tests/Samples/ettlinger-alb.jpg'; diff --git a/tests/Feature/PhotosAddMethodsTest.php b/tests/Feature/PhotosAddMethodsTest.php index 23ce89d789b..3fb9d9b2d27 100644 --- a/tests/Feature/PhotosAddMethodsTest.php +++ b/tests/Feature/PhotosAddMethodsTest.php @@ -214,6 +214,24 @@ public function testImportFromUrl(): void ]]); } + public function testImportFromUrlWithoutExtension(): void + { + $response = $this->photos_tests->importFromUrl([TestConstants::SAMPLE_DOWNLOAD_JPG_WITHOUT_EXTENSION]); + + $response->assertJson([[ + 'album_id' => null, + 'title' => 'mongolia', + 'type' => TestConstants::MIME_TYPE_IMG_JPEG, + 'size_variants' => [ + 'original' => [ + 'width' => 1280, + 'height' => 850, + 'filesize' => 201316, + ], + ], + ]]); + } + /** * Test import from URL of a supported raw image. * diff --git a/tests/Feature/PhotosAddNegativeTest.php b/tests/Feature/PhotosAddNegativeTest.php index 9b0263539fc..f9b0781f04f 100644 --- a/tests/Feature/PhotosAddNegativeTest.php +++ b/tests/Feature/PhotosAddNegativeTest.php @@ -127,6 +127,31 @@ public function testRefusedRawImportFormUrl(): void } } + /** + * Test import from URL of an unsupported raw image without file extension. + * + * We need this test because in case the file doesn't have an extension, we'll download the file + * and try to guess the extension. + * + * @return void + */ + public function testRefusedRawImportFormUrlWithoutExtension(): void + { + $acceptedRawFormats = static::getAcceptedRawFormats(); + try { + static::setAcceptedRawFormats(''); + + $this->photos_tests->importFromUrl( + [TestConstants::SAMPLE_DOWNLOAD_TIFF_WITHOUT_EXTENSION], + null, + 422, + 'MediaFileUnsupportedException' + ); + } finally { + static::setAcceptedRawFormats($acceptedRawFormats); + } + } + /** * Recursively restricts the access to the given directory. * diff --git a/tests/Samples/tiff b/tests/Samples/tiff new file mode 100644 index 0000000000000000000000000000000000000000..5e617342d2210ad1704f99f256d1e50ab3ba157c GIT binary patch literal 5802 zcmeI0cTg1Dv&RPz34(xx1<3*;3X)k6ket)9wEWBy}G~uU)4KPJLj`~zI~>rXS%9uG&F7keOCbBk=(32DK`Tp z$x}r;SgoT3D-}xFUSBfXSO5~O>Y!h{=r7A2r|t-s;!f7#P1JOP*Ev>O3BJ^J)(@op z=>kpDbup-0JP4M{gt;2t;bDkT%Z59=GdGez>*X8XGrE&wTw+peeBWT*$)nNct*N`Q zG*2?trPACXY09V?=UHp%VIrL~wgs!U+A(>*bZ{E-!Pd*LsE>*=qRrmhw82?oFs#GT z$4n;IL?ESCX_qN&IY2gJz!ho4kAJR{H*(L%eNN>yKRu1|ptccxlee-Nq@Cv39J#7~F@m`v9_h{65x?QUsy0Ls z#`*5Wg-EWhD_T@s8gO?sZz3XSTDmQn{)Ei}+5Qnq;%hKER!XJz5dj*N!FX|c)giiQVYMN{ z5&_ztkw$TycCj2Oy%DNlS=dO@tq%Q>Q60neQF<#YliGcRo!M9n&dGe7zQ@91+-U2M zf$^l%FuRIs%1DPv3Y{3o$>e({PLpFX*|nb;&?Wbat4qoeQ&HF|k7)>bhUc`2Krd!G zMRwFL52`a6fG2ap1>#c*Rf6#2*uA}BDDE_*0MbtIcoub=_{kjPfa>X7N+~3K9^%Z7 znoqgS8@0f&EEv6zoDPj$WN?;?TTEtCd$B}6rkA*slx~u|OmA%SayjX;%j*@oG0(J> z#MprJRXWv>%+FxI#IL>gyfj1B>V08`P{ivY{&;@p6hd!nQ@oASYRh~s?GhqANCyC%=J1FcLV z$FBFxpgWIQGTgfJEt^HLhiU8k3xRjlB8I*+NrykWKC7zELHNHaQ}_%L)qY-?`XTI` z>JAnEQ<<5IRq=w9y-vR3Vk0%s=rUeqsl~ovIh0Vo(lHMqSeY8O8nv7+67(WrFAWt= z6~W2`_>iBm{*C0%nl}!>%8cHtyu77z%TfEtQ^8UmDlI;?#rwJ-cd4#-`(eQQk;3@I zSUUZP5B4tKs3*!;qcUyt%PnuJUB1!j?8MmXRVJPhYl^%o%VR#US9N(S=9XW}5^`wM zeLNMnA0)%MS$K4_*x0(0`AdN1T^{BjrkO9QKdiyZBpLKQwvv>{Fen4m} zom2CXO&*raWbcF?@Q}8|^@^e7gr;TQl?Z+_1H}6l?5LXFDt_M;#P-4>6g#7ZA?;C> z_9#{FV&th(`Sc$#awP^oQ6Fwky=SkFH$Ys}AQkh7=n!$wexb37%;M|0%<_=GC$?CL zQ`v`Gi>s%UC5b_15Se-tQ_;`^#U$7>L3L>hw*>lzl%H%W$IIq!vCswIQTMX+< zq?l2{80yfg6%K#mysGDOmGjyMWU8pb8!wA~Zhe#vtB$4-IgS2ElBSV{5p499(9ky) zJ>zOmIuk`5F_G}DCOwBWb1ixiDnnC-o8rc%0|M5YoulX%rNgKsS#d-9sfX6J^rso@ zrt0F6h-uOaVXqc{%M<4T$h8vJHxg)M!FXI)FqxHpPCa08SkmQaqkD=K)*I$I&c>m_ zaPZH4Dl3maK^nh>n?aX3R&4fTiCpa6au7VjUw^WOEeQ3=x#M3^ZbUlvvE7Uqru_-e zu+9IIN_3Hu0V|B|`;c(bOclNIB!4Ev?4sp6TK zdN$!AF7rFg*$a~;<>-y)s#z~KtBi{(-(GEDDzTEbzg5PWN?cd&_Wt_fn)?ZUG{X~n zaXM_91l_KLGx&YXDw#qv%=FlW`S=HbOKuZBX-dd`QH0iEA;*%b!*8uHYKwZ1JA zAd#z{l3~7s<7#7-!5MD+d${`NJs6pOWa$2?p2O7x+zI{s_rWvY#oXX|nwda6In5k? zs!cF1C^{m3v%SftHZiDE4l{GCE>{3K`SZAA?-X{}c#xezPWkoF<|gI0xu!~|%|9$&d$H_`tV%+< zG&H3A#i`Hsy~*c3iDg~HrjGEa4g}v&WORIt;f8X?p`W$9q#++!53{>0!YFE4D^OUBS@{LpwtB5_@%Ha*uqTHah=7a|BLU3fFihKe_&tl{$Yz@8gvWi`2#4EDA>&;#Dw)YNPb}`=D5}icDlvgSx;s*sj zXh_~%H+;t*M&-<>hALpnn09^9mM+&VDlaepA3B65ja*8#3!1VtchzBVA&O+--yMRu zj-WuV{j)C2WrRSQuKgz8?9X62N0{oK)ab7c!A+iQf&34!XUI>i|Eoi=jpIGN2X+X? zTzcxW+cHCO>e(;uQ&!{dq!i@x)TNDidNwGyRUSXP77lHg^)i*L+76-3oGA>{c80Qg zmOAX3-KmQ0ajDGUcICT4!k1Aq?Ga$KI1!OqG*`$Nl}w~xHle?7GWD|qQ8TUJBaL1@ zAWM$zn7_g@S|;1Hs99-nBW+kKV6jKyL9g-qtnnG-yeYcOp>d{g`v;p0gb>>j@l`Zv zhPRrwr{34&a|qI;ie}sXXLAEPp5sS-{~|E~#*>bWsDMgTXs$$ie)58Jf2jJfBI*4f z5g(|F#YXaqrK`tUy)R7>DeK?7MJ!9|JF=jR-Qzxz+Q~N0RVuN(yWZ~IZE^`S(Cf!u zJ;?o-vu1VbNu@$8Kg&XAjIwp4)Ihc`8)x}m>aCW+BG*%`D;q?dT7&NT+_FOVCxt5D z54djB)41n{YYr>7LNUJ4OVULK7rs(zyZ4Nf^t;oIpmgFBaRPT@w}@`_p$xZFhR9|1 z)j#-dCdhRqzQHMA_W1o&F=D~uN(BSozET_dXLCO~x(sB^%Z5>YvV4Q2@fucP zRa;`D6Tns&To8r!hnrv!A+{5KuLX_G`c7@=wMU5G6piI0Jefuq>M3-liFiKQ8;ip+m!3JO8&kzrF~T9?=97eGC{EKvttiWhWnuVC;I>r)RGrNgzo>4k zp}o8fw_VnxLd!)jn_MPKT75a@lDcrT!JdYZ!lM&bc49*rSA)VmbK0bw8cQ8Nd*rs? zBUQ~Q&yiB3O;R@BCDTVxZchUq%rQje9zA{Cp_?b&0#%&M&hN83s#Je$etA(w**f^J zgYoT-tHepi8yP~Xk4g{IH@d}G#vy&WmI3m`yEg=R2Qg_Cl)Xxpd)^}wvVQavS~!Sn zAu^v=Zz}T?erZgxF|iIWVogUhua2&X%R1yB30OMp{ zNkRrlYvgMg!)z(2t_{$9m3Qg1d`)=YRdcT7MoPqms2`V~l3pQg5#b@GYu$ZJjqs36 zoL!j{Hh&6hJ8dy_Rl+Fp2Y)FkR_GWi!2JOYht=I_C4CxfSENPb~=yT*8CTymJNbLbqr?H`oVXN}^D99nyDD z4uB4pimFZsFweFchr$l{l)jy@W(`U{7vY}n=&WPM7<$EdcV(+cK002slZy9o;%Kas zjKuF=GN#VuRfqIqvQmX(%P;D6=A*K*l}jXc@9=k#X}ZpbuDqzYYSb0!sOdP}EV?^% zOUq@F(PtBS%e5uasg(Mv{f)hHLGT#aRl8R21LE^OdO?nG|> zXm7YkA72JfwhSKK-sGdqhb351M5Ez30)3Oq%6SxLLIz{Z{egnFxuUzgd|RwPnO{&^ zrKA$1yK)ugMxe-zCifMU55?Z(R?LrK7pR0Y4!T52nD;iMRvgDucPnXWo^n4Od4-3g zf}t#iySW!`rwmuT)5frh`R{#J97fsb7JMbmtKu#)BvpW+bCl6O))9s+WbpNJt%3o~ zd};C=IW8WvyhN{1{q;7J!fC3a(l=$;IwQ^X65}Fzw1&pIg*lED@ybx7R&V{2k{v96^Y zcJJSwvQpde^==|1Kh~T&304o=e3-ISY4h$VWO14F1I5*oL4MMSFTy7u;i6=-)v}?k ze^T8GR6d?F-H&)%PVzy|g;H!&@?rS}w@;E!OXi}8Bh9vXWot`sJ7_DR@YO=j-#d35 zTuYw6wU!JDTzl=%+7Wl{u6k1)*`lMWPWtc?OFfZE=w(xf zI@cYiRkc?o&bIzXkulCn<*zxCy(*XrK0CLwrp7k;Jbtxa@1kaheyJLGN9ya@s^+H* zwCW?N)Tfz zoTajSc=^TGeFXO|E(P2_$Qfs>Slht<;iGo_BQrBa6k%iZMSqpH39baYn3!kf2fED- zjPKnLRwY&ArTJ6Lo*sxvmnw;axoL|S_N|hxxtJ7Xv2sQEhy0Z(e`V=rsUn8QPoIZMH?FS<;v-@~-$rq!an&57H-*Dx)>d!oIY zWIGn1tcxtqUnqL0bOovL1^>uM^1)kfNx29vTVk7i+1#*4tzyg5E9t(tcP&AO3ahXV z^xoZ=!QmN|HCDk~ZhX@4g9!actV9KV*2N>oAtoDE?qB@U>&8zA8a8kd75-)sqtZ~f zEsd=5gC^Y6+H==!*8Y0VrYYRRjv%App2h(D4}a%+;;lYp+le_T3)=af*Yus8DlCrrKQuJ||Hyn_C2#_6f~OPebLIJ=Gdt_0p29mpIm|6u!nu!E})0@R3s8g55~2e?0>>0j)4&VQUU!rc#y z?bkR@HieUikpVb!gFhBP4bT8|02sgl*aLom8{iHg0ReFK04=@%99&oY7yqlj{9&M% zJ?P~IIDj6?fG2{4xN&Hw