diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 79f44a9..2ef8d84 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -304,7 +304,7 @@ public function getIdfromPrefixedEntityNode($key) { * - If not remote and exists a Drupal file object * - If does not exist boolean FALSE */ - public function file_get($uri, File $zip_file = NULL) { + public function file_get($uri, File $zip_file = NULL, $force = FALSE) { $uri = trim($uri); $parsed_url = parse_url($uri); @@ -350,11 +350,10 @@ public function file_get($uri, File $zip_file = NULL) { // Means no local file but we can check inside a ZIP. // Try with the ZIP file in case there is a ZIP and local failed // Use the Zip file uuid to prefix the destination. - // @TODO file_build_uri is deprecated replace before Drupal 10.0.0 $localfile = $this->streamWrapperManager->normalizeUri( $destination . $zip_file->uuid() . '/' . urldecode($parsed_url['path']) ); - if (!file_exists($localfile)) { + if (!file_exists($localfile) || $force) { $destination_zip = $destination . $zip_file->uuid() . '/'; if (!$this->fileSystem->prepareDirectory( $destination_zip, @@ -406,7 +405,7 @@ public function file_get($uri, File $zip_file = NULL) { $localfile = $path; } // Actual remote heavy lifting only if not present. - if (!file_exists($localfile)) { + if (!file_exists($localfile) || $force) { if (!$this->fileSystem->prepareDirectory( $destination, FileSystemInterface::CREATE_DIRECTORY @@ -495,14 +494,30 @@ public function retrieve_remote_file( if ($max_time == 0) { $max_time = 720.00; } + // Do a HEAD request first. Be sure we don't have anything in the 4XX or 5xx range + $head = $this->httpClient->head($uri, ['timeout' => round($max_time,2)]); + // Note. This will never run per se. Because the client is setup to throw an exception, but... + // If Drupal changes the client base setup in the future we won't know and we won't catch it + // So keeping it. + if ($head->getStatusCode() >= 400) { + return FALSE; + } $response = $this->httpClient->get($uri, ['sink' => $path, 'timeout' => round($max_time,2)]); + // Edge case... in a fraction of time, someone closes the file from the remote source. We can still cancel + // Same as with the HEAD. In this current setup this won't run and that is ok, the catch deals with it. + if ($response->getStatusCode() >= 400) { + if (file_exists($path)) { + @unlink($path); + } + return FALSE; + } + $filename_from_remote = $basename; $filename_from_remote_without_extension = pathinfo($filename_from_remote, PATHINFO_FILENAME); $extensions_from_remote = pathinfo($filename_from_remote, PATHINFO_EXTENSION); $extension_from_mime = NULL; $extension = NULL; $content_disposition = $response->getHeader('Content-Disposition'); - if (!empty($content_disposition)) { $filename_from_remote = $this->getFilenameFromDisposition($content_disposition[0]); if ($filename_from_remote) { @@ -526,14 +541,20 @@ public function retrieve_remote_file( } } catch (\Exception $exception) { + // Deals with 4xx and 5xx too. $message_vars = [ '@uri' => $uri, '@path' => $path, '@error' => $exception->getMessage(), '@time' => $max_time, + '@code' => $exception->getCode() ]; - $message = 'Unable to download remote file from @uri to local @path with error: @error. Verify URL exists, file can be downloaded in @time seconds, its openly accessible and destination is writable.'; + $message = 'Unable to download remote file from @uri to local @path with HTTP code @code and error: @error. Verify URL exists, file can be downloaded in @time seconds, its openly accessible and destination is writable.'; $this->loggerFactory->get('ami')->error($message, $message_vars); + // in case the sink did download the file/we delete it here. + if (file_exists($path)) { + @unlink($path); + } return FALSE; } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 3e346c4..ef71bd1 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -979,7 +979,6 @@ protected function processFile($data) { ); if (!$file) { $data->info['force_file_process'] = TRUE; - // OK still there and alive. If not we have to force reprocessing! } elseif (file_exists($file->getFileUri()) == FALSE) { $data->info['force_file_process'] = TRUE; @@ -992,7 +991,7 @@ protected function processFile($data) { // First check if we already have the info here and not forced to recreate, if so do nothing. if (($data->info['force_file_process'] ?? FALSE)) { $file = $this->AmiUtilityService->file_get(trim($data->info['filename']), - $data->info['zip_file']); + $data->info['zip_file'], $data->info['force_file_process']); if ($file) { $force_destination = isset($data->info['ops_forcemanaged_destination_file']) ? (bool) $data->info['ops_forcemanaged_destination_file'] : TRUE; $reduced = $data->info['reduced'] ?? FALSE;