Skip to content

Commit c13cfbf

Browse files
committed
Added recursive metadata support (fixes #16)
1 parent f4e725f commit c13cfbf

File tree

8 files changed

+108
-5
lines changed

8 files changed

+108
-5
lines changed

src/Client.php

+15-2
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,26 @@ public function setDownloadRemote($download)
231231
/**
232232
* Gets file metadata
233233
*
234+
* @link https://wiki.apache.org/tika/TikaJAXRS#Recursive_Metadata_and_Content
234235
* @param string $file
236+
* @param string $recursive
235237
* @return \Vaites\ApacheTika\Metadata\Metadata
236238
* @throws \Exception
237239
*/
238-
public function getMetadata($file)
240+
public function getMetadata($file, $recursive = null)
239241
{
240-
return $this->request('meta', $file);
242+
if(is_null($recursive))
243+
{
244+
return $this->request('meta', $file);
245+
}
246+
elseif(in_array($recursive, ['text', 'html', 'ignore']))
247+
{
248+
return $this->request("rmeta/$recursive", $file);
249+
}
250+
else
251+
{
252+
throw new Exception("Unknown recursive type (must be text, html, ignore or null)");
253+
}
241254
}
242255

243256
/**

src/Clients/CLIClient.php

+6
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,12 @@ protected function getArguments($type, $file = null)
271271
$arguments[] = '--metadata --json';
272272
break;
273273

274+
case 'rmeta/ignore':
275+
case 'rmeta/html':
276+
case 'rmeta/text':
277+
throw new Exception('Recursive metadata is not supported in command line mode');
278+
break;
279+
274280
case 'text':
275281
$arguments[] = '--text';
276282
break;

src/Clients/WebClient.php

+5-2
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ public function request($type, $file = null)
353353
// request completed successfully
354354
if($status == 200)
355355
{
356-
if($type == 'meta')
356+
if(preg_match('/^(meta|rmeta)/', $type))
357357
{
358358
$response = Metadata::make($response, $file);
359359
}
@@ -497,7 +497,10 @@ protected function getParameters($type, $file = null)
497497
break;
498498

499499
case 'meta':
500-
$resource = 'meta';
500+
case 'rmeta/html':
501+
case 'rmeta/ignore':
502+
case 'rmeta/text':
503+
$resource = $type;
501504
$headers[] = 'Accept: application/json';
502505
break;
503506

src/Metadata/DocumentMetadata.php

+4
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ protected function setAttribute($key, $value)
164164
$this->encoding = $value;
165165
break;
166166

167+
case 'x-tika:content':
168+
$this->content = $value;
169+
break;
170+
167171
default:
168172
return false;
169173
}

src/Metadata/ImageMetadata.php

+4
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ protected function setAttribute($key, $value)
6060
$this->width = (int) $value;
6161
break;
6262

63+
case 'x-tika:content':
64+
$this->content = $value;
65+
break;
66+
6367
default:
6468
return false;
6569
}

src/Metadata/Metadata.php

+11-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
*/
1212
abstract class Metadata
1313
{
14+
/**
15+
* Content
16+
*
17+
* @var string
18+
*/
19+
public $content = null;
20+
1421
/**
1522
* MIME type
1623
*
@@ -86,7 +93,10 @@ public static function make($response, $file)
8693
}
8794

8895
// decode the JSON response
89-
$meta = json_decode($response);
96+
$json = json_decode($response);
97+
98+
// get the meta info
99+
$meta = is_array($json) ? current($json) : $json;
90100

91101
// exceptions if metadata is not valid
92102
if(json_last_error())

tests/ErrorTest.php

+18
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,24 @@ public function testRequestRestrictedOptions()
150150
}
151151
}
152152

153+
/**
154+
* Test wrong recursive metadata type
155+
*/
156+
public function testRequestMetadataType()
157+
{
158+
try
159+
{
160+
$client = Client::prepare('localhost', 9998, [CURLOPT_PROXY => 'localhost']);
161+
$client->getMetadata(dirname(__DIR__) . '/samples/sample3.png', 'bad');
162+
163+
$this->fail();
164+
}
165+
catch(Exception $exception)
166+
{
167+
$this->assertContains('Unknown recursive type', $exception->getMessage());
168+
}
169+
}
170+
153171
/**
154172
* Test unsupported media type
155173
*/

tests/WebTest.php

+45
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,49 @@ public function testSetRetries()
115115

116116
$this->assertEquals(5, $client->getRetries());
117117
}
118+
119+
/**
120+
* Recursive text metadata test
121+
*
122+
* @dataProvider ocrProvider
123+
*
124+
* @param string $file
125+
* @throws \Exception
126+
*/
127+
public function testTextRecursiveMetadata($file)
128+
{
129+
$metadata = self::$client->getMetadata($file, 'text');
130+
131+
$this->assertContains('Ut enim ad minim veniam', $metadata->content);
132+
}
133+
134+
/**
135+
* Recursive HTML metadata test
136+
*
137+
* @dataProvider ocrProvider
138+
*
139+
* @param string $file
140+
* @throws \Exception
141+
*/
142+
public function testHtmlRecursiveMetadata($file)
143+
{
144+
$metadata = self::$client->getMetadata($file, 'html');
145+
146+
$this->assertContains('Ut enim ad minim veniam', $metadata->content);
147+
}
148+
149+
/**
150+
* Recursive ignore metadata test
151+
*
152+
* @dataProvider ocrProvider
153+
*
154+
* @param string $file
155+
* @throws \Exception
156+
*/
157+
public function testIgnoreRecursiveMetadata($file)
158+
{
159+
$metadata = self::$client->getMetadata($file, 'ignore');
160+
161+
$this->assertNull($metadata->content);
162+
}
118163
}

0 commit comments

Comments
 (0)