Skip to content

Commit cbba9b4

Browse files
committed
Merge branch '1.x'
2 parents 18bd11a + 304f855 commit cbba9b4

File tree

7 files changed

+74
-6
lines changed

7 files changed

+74
-6
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ jobs:
4646
- '2.5.0'
4747
- '2.6.0'
4848
- '2.7.0'
49+
- '2.8.0'
4950

5051
name: PHP ${{ matrix.php }} - TIKA ${{ matrix.tika }}
5152

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## v1.3.0
4+
5+
* Added `WebClient::setFetcherName()` to easily set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes) (thanks to @relthyg)
6+
* Tested up to Apache Tika 1.28.5 and 2.8.0
7+
38
## v1.2.5
49

510
* Tested up to Apache Tika 1.28.5 and 2.7.0

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ to work with the new versions of the tool.
3131
* Support for local and remote resources
3232
* No heavyweight library dependencies
3333
* Compatible with Apache Tika 1.15 or greater
34-
* Tested up to 1.28.5 and 2.7.0
34+
* Tested up to 1.28.5 and 2.8.0
3535
* Works on Linux, macOS, Windows and probably on FreeBSD
3636

3737
## Requirements
@@ -124,8 +124,8 @@ You can use an URL instead of a file path and the library will download the file
124124

125125
If you use Apache Tika >= 2.0.0, you *can* [define an HttpFetcher](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes)
126126
and use the option `-enableUnsecureFeatures -enableFileUrl` when starting the server to make the server download remote
127-
files when passing a URL instead of a filname to `$client->getText()`. In order to do so, you must set the name of
128-
the HttpFetcher using `$client->setFetcherName('yourFetcherName')`.
127+
files when passing a URL instead of a filename. In order to do so, you must set the name of the HttpFetcher using
128+
`$client->setFetcherName('yourFetcherName')`.
129129

130130
### Methods
131131

@@ -190,6 +190,14 @@ $client->setDownloadRemote(true);
190190
$client->getDownloadRemote();
191191
```
192192

193+
194+
Set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes):
195+
196+
```php
197+
$client->setFetcherName($fetcher); // one of FileSystemFetcher, HttpFetcher, S3Fetcher, GCSFetcher, or SolrFetcher
198+
$client->getFetcherName();
199+
```
200+
193201
#### Command line client
194202

195203
Set/get JAR/Java paths (only CLI mode):

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"supported-versions": [
4949
"1.15", "1.16", "1.17", "1.18", "1.19", "1.19.1", "1.20", "1.21", "1.22", "1.23", "1.24", "1.24.1",
5050
"1.25", "1.26", "1.27", "1.28", "1.28.1", "1.28.2", "1.28.3", "1.28.4", "1.28.5",
51-
"2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0"
51+
"2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0", "2.8.0"
5252
]
5353
},
5454
"scripts": {

src/Clients/WebClient.php

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,28 @@ public function setRetries(int $retries): self
215215
return $this;
216216
}
217217

218+
/**
219+
* Get the name of the fetcher to be used (for Tika >= 2.0.0 only)
220+
*
221+
* @return string|null
222+
*/
223+
public function getFetcherName(): ?string
224+
{
225+
return $this->fetcherName;
226+
}
227+
218228
/**
219229
* Set the name of the fetcher to be used (for Tika >= 2.0.0 only)
230+
*
231+
* @link https://cwiki.apache.org/confluence/display/TIKA/tika-pipes
220232
*/
221233
public function setFetcherName(string $fetcherName): self
222234
{
235+
if(!in_array($fetcherName, ['FileSystemFetcher', 'HttpFetcher', 'S3Fetcher', 'GCSFetcher', 'SolrFetcher']))
236+
{
237+
throw new Exception("Fetcher name $fetcherName is invalid, see https://cwiki.apache.org/confluence/display/TIKA/tika-pipes");
238+
}
239+
223240
$this->fetcherName = $fetcherName;
224241

225242
return $this;
@@ -643,10 +660,13 @@ protected function getParameters(string $type, string $file = null): array
643660

644661
if(!empty($file) && preg_match('/^http/', $file))
645662
{
646-
if($this->fetcherName) {
663+
if($this->fetcherName)
664+
{
647665
$headers[] = "fetcherName:$this->fetcherName";
648666
$headers[] = "fetchKey:$file";
649-
} else {
667+
}
668+
else
669+
{
650670
$headers[] = "fileUrl:$file";
651671
}
652672
}

tests/ErrorTest.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,22 @@ public function testUnsupportedChunkSize(): void
230230
}
231231
}
232232

233+
/**
234+
* Test invalid fetcher name
235+
*/
236+
public function testUnsupportedFetcherName(): void
237+
{
238+
try
239+
{
240+
$client = Client::make('localhost', 9998);
241+
$client->setFetcherName('UnknownFetcher');
242+
}
243+
catch(Exception $exception)
244+
{
245+
$this->assertStringContainsString('Fetcher name UnknownFetcher is invalid', $exception->getMessage());
246+
}
247+
}
248+
233249
/**
234250
* Test wrong request type for all clients
235251
*

tests/WebTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,24 @@ public function testSetRetries(): void
156156
$this->assertEquals(5, $client->getRetries());
157157
}
158158

159+
/**
160+
* Set fetcher name test
161+
*/
162+
public function testFetcherName(): void
163+
{
164+
if(version_compare(self::$version, '2.0.0') >= 0)
165+
{
166+
$client = Client::make('localhost', 9998);
167+
$client->setFetcherName('FileSystemFetcher');
168+
169+
$this->assertEquals('FileSystemFetcher', $client->getFetcherName());
170+
}
171+
else
172+
{
173+
$this->markTestSkipped('Apache Tika 1.x doesn\'t have tika-pipes module');
174+
}
175+
}
176+
159177
/**
160178
* Test delayed check
161179
*/

0 commit comments

Comments
 (0)