Skip to content

Commit ed923ef

Browse files
authored
Try fetching files from incompatible servers. (#5)
1 parent 3193983 commit ed923ef

File tree

3 files changed

+65
-11
lines changed

3 files changed

+65
-11
lines changed

composer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
}
1818
},
1919
"require": {
20-
"getdkan/procrastinator": "~1.0.2",
20+
"getdkan/procrastinator": "~2.0.0",
2121
"ext-curl": "*"
2222
}
2323
}

src/FileFetcher.php

+45-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class FileFetcher extends Job
99
{
1010
private $temporaryDirectory;
1111
private $chunkSizeInBytes = (1024 * 100);
12+
private $compatibleServer = true;
1213

1314
public function __construct($filePath, $temporaryDirectory = "/tmp")
1415
{
@@ -27,22 +28,36 @@ public function __construct($filePath, $temporaryDirectory = "/tmp")
2728
$state['destination'] = $file->isFile() ? $filePath : $this->getTemporaryFilePath($filePath);
2829

2930
if (!$file->isFile() && $this->serverIsNotCompatible($filePath)) {
30-
throw new \Exception("The server hosting the file does not support ranged requests.");
31+
$this->compatibleServer = false;
32+
$state['total_bytes'] = PHP_INT_MAX;
33+
$this->deleteFile($state['destination']);
34+
} else {
35+
$state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath);
3136
}
3237

33-
$state['total_bytes'] = $file->isFile() ? $file->getSize() : $this->getRemoteFileSize($filePath);
34-
3538
if (file_exists($state['destination'])) {
3639
$state['total_bytes_copied'] = filesize($state['destination']);
3740
}
3841

3942
$this->setState($state);
4043
}
4144

45+
public function setTimeLimit(int $seconds): bool
46+
{
47+
if (!$this->compatibleServer) {
48+
return false;
49+
}
50+
return parent::setTimeLimit($seconds);
51+
}
52+
4253
protected function runIt()
4354
{
4455
try {
45-
$this->copy();
56+
if ($this->compatibleServer) {
57+
$this->copy();
58+
} else {
59+
$this->copyIncompatible();
60+
}
4661
$result = $this->getResult();
4762
$result->setStatus(Result::DONE);
4863
} catch (FileCopyInterruptedException $e) {
@@ -149,6 +164,23 @@ private function copy()
149164
}
150165
}
151166

167+
private function copyIncompatible()
168+
{
169+
$from = $this->getStateProperty('source');
170+
$to = $this->getStateProperty('destination');
171+
172+
$bufferSize = 1048576;
173+
$bytesCopied = 0;
174+
$fin = fopen($from, "rb");
175+
$fout = fopen($to, "w");
176+
while (!feof($fin)) {
177+
$bytesCopied += fwrite($fout, fread($fin, $bufferSize));
178+
}
179+
fclose($fin);
180+
fclose($fout);
181+
$this->setStateProperty('total_bytes_copied', $bytesCopied);
182+
}
183+
152184
private function getChunk()
153185
{
154186
$url = $this->getStateProperty('source');
@@ -175,8 +207,8 @@ private function getChunk()
175207

176208
private function getTemporaryFilePath($sourceFileUrl)
177209
{
178-
$pieces = explode("/", $sourceFileUrl);
179-
$file_name = end($pieces);
210+
$info = parse_url($sourceFileUrl);
211+
$file_name = str_replace(".", "_", $info["host"]) . str_replace("/", "_", $info['path']);
180212
return $this->getTemporaryFile($file_name);
181213
}
182214

@@ -218,6 +250,13 @@ private function setState($state)
218250
$this->getResult()->setData(json_encode($state));
219251
}
220252

253+
private function deleteFile($file)
254+
{
255+
if (file_exists($file)) {
256+
unlink($file);
257+
}
258+
}
259+
221260
public function setStateProperty($property, $value)
222261
{
223262
$state = $this->getState();

test/FileFetcherTest.php

+19-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
namespace FileFetcherTest;
44

5+
use FileFetcher\FileFetcher;
6+
use Procrastinator\Result;
7+
58
class FileFetcherTest extends \PHPUnit\Framework\TestCase
69
{
710

@@ -15,7 +18,8 @@ public function testRemote()
1518
);
1619
$result = $fetcher->run();
1720
$data = json_decode($result->getData());
18-
$this->assertEquals("/tmp/sacramentorealestatetransactions.csv", $data->destination);
21+
$filepath = "/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv";
22+
$this->assertEquals($filepath, $data->destination);
1923
$this->assertTrue($data->temporary);
2024
}
2125

@@ -50,18 +54,29 @@ public function testTimeOut()
5054
$fetcher2->run();
5155
$this->assertEquals($file_size, $fetcher2->getStateProperty('total_bytes_copied'));
5256
$this->assertEquals(
53-
filesize("/tmp/{$this->sampleCsvSize}_mb_sample.csv"),
57+
filesize("/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv"),
5458
$fetcher2->getStateProperty('total_bytes_copied')
5559
);
5660
$this->assertEquals($fetcher2->getResult()->getStatus(), \Procrastinator\Result::DONE);
5761
}
5862

63+
public function testIncompatibleServer()
64+
{
65+
$url = "https://data.medicare.gov/api/views/42wc-33ci/rows.csv?accessType=DOWNLOAD&sorting=true";
66+
$fetcher = new FileFetcher($url);
67+
$fetcher->setTimeLimit(1);
68+
$result = $fetcher->run();
69+
$this->assertEquals(Result::DONE, $result->getStatus());
70+
$this->assertEquals(2853, json_decode($result->getData())->total_bytes_copied);
71+
}
72+
5973
public function tearDown(): void
6074
{
6175
parent::tearDown();
6276
$files = [
63-
"/tmp/{$this->sampleCsvSize}_mb_sample.csv",
64-
"/tmp/sacramentorealestatetransactions.csv"
77+
"/tmp/samplecsvs_s3_amazonaws_com_sacramentorealestatetransactions.csv",
78+
"/tmp/dkan_default_content_files_s3_amazonaws_com_{$this->sampleCsvSize}_mb_sample.csv",
79+
"/tmp/data_medicare_gov_api_views_42wc_33ci_rows.csv",
6580
];
6681

6782
foreach ($files as $file) {

0 commit comments

Comments
 (0)