@@ -22,10 +22,17 @@ trait HtmlTableFromNode {
22
22
/** @use TableExtractor<TColumnReturn> */
23
23
use TableExtractor;
24
24
25
- public function inferTableFrom ( string $ source , bool $ normalize = true ): void {
26
- $ this ->inferTableFromDOMNodeList (
27
- DOMDocumentFactory::bodyFromHtml ( $ source , normalize: $ normalize )->childNodes
28
- );
25
+ /** @throws InvalidSource When "table" cannot be resolved in given source. */
26
+ public function inferTableFrom ( string |DOMElement $ source , bool $ normalize = true ): void {
27
+ $ source = $ this ->getValidatedTableSource ( $ source , $ normalize );
28
+
29
+ if ( $ source instanceof DOMNodeList ) {
30
+ $ this ->inferTableFromDOMNodeList ( $ source );
31
+
32
+ return ;
33
+ }
34
+
35
+ $ this ->inferChildNodesFromTable ( $ source );
29
36
}
30
37
31
38
/** @param DOMNodeList<DOMNode> $elementList */
@@ -87,37 +94,43 @@ public function inferTableDataFrom( iterable $elementList ): array {
87
94
return $ data ;
88
95
}
89
96
90
- /** @param DOMNodeList<DOMNode> $elementList */
91
- protected function inferTableFromDOMNodeList ( DOMNodeList $ elementList ): void {
92
- foreach ( $ elementList as $ node ) {
93
- if ( ! $ tableStructure = $ this ->traceStructureFrom ( $ node ) ) {
94
- continue ;
95
- }
97
+ protected function inferChildNodesFromTable ( DOMElement $ element ): bool {
98
+ $ iterator = $ this ->childNodesIteratorOfTable ( $ element );
96
99
97
- assert ( $ node instanceof DOMElement );
100
+ if ( ! $ iterator || ! $ tableStructure = $ this ->traceTableStructureIn ( $ iterator ) ) {
101
+ return false ;
102
+ }
98
103
99
- [$ bodyNode , $ captionNode , $ headNode ] = $ tableStructure ;
104
+ [$ bodyNode , $ captionNode , $ headNode ] = $ tableStructure ;
100
105
101
- $ splId = spl_object_id ( $ node );
102
- $ id = $ splId * spl_object_id ( $ bodyNode );
106
+ $ splId = spl_object_id ( $ element );
107
+ $ id = $ splId * spl_object_id ( $ bodyNode );
103
108
104
- $ this ->dispatchEventForTable ( $ id , $ bodyNode );
109
+ $ this ->dispatchEventForTable ( $ id , $ bodyNode );
105
110
106
- $ captionNode && $ this ->captionStructureContentFrom ( $ captionNode );
107
- $ headNode && $ this ->headStructureContentFrom ( $ headNode );
111
+ $ captionNode && $ this ->captionStructureContentFrom ( $ captionNode );
112
+ $ headNode && $ this ->headStructureContentFrom ( $ headNode );
108
113
109
- $ iterator = $ this ->bodyStructureIteratorFrom ( $ bodyNode );
114
+ $ iterator = $ this ->bodyStructureIteratorFrom ( $ bodyNode );
110
115
111
- $ iterator ->valid () && ( $ this ->discoveredTable__rows [ $ id ] = $ iterator );
116
+ $ iterator ->valid () && ( $ this ->discoveredTable__rows [ $ id ] = $ iterator );
112
117
113
- if ( $ this ->discoveredTargetedTable ( $ node ) ) {
114
- $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ node , $ this ) );
118
+ $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ element , $ this ) );
115
119
116
- break ;
120
+ return true ;
121
+ }
122
+
123
+ /** @param DOMNodeList<DOMNode> $elementList */
124
+ protected function inferTableFromDOMNodeList ( DOMNodeList $ elementList ): void {
125
+ foreach ( $ elementList as $ node ) {
126
+ if ( ! AssertDOMElement::isValid ( $ node ) || ! $ this ->inferChildNodesFromTable ( $ node ) ) {
127
+ continue ;
117
128
}
118
129
119
- $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ node , $ this ) );
120
- }//end foreach
130
+ if ( $ this ->discoveredTargetedTable ( $ node ) ) {
131
+ break ;
132
+ }
133
+ }
121
134
}
122
135
123
136
final protected function findTableStructureIn ( DOMNode $ node , int $ minChildNodesCount = 0 ): void {
@@ -132,8 +145,8 @@ final protected function isTableRowStructure( DOMNode $node ): bool {
132
145
}
133
146
134
147
/** @return Iterator<int,DOMNode> */
135
- private function getChildNodesIteratorFrom ( DOMNode $ node ): Iterator {
136
- return $ node ->childNodes ->getIterator ();
148
+ private function getChildNodesIteratorFrom ( DOMElement $ element ): Iterator {
149
+ return $ element ->childNodes ->getIterator ();
137
150
}
138
151
139
152
/**
@@ -151,15 +164,15 @@ private function assertCurrentColumnIsDOMElement( mixed $node ): void {
151
164
}
152
165
153
166
/** @return ?Iterator<int,DOMNode> */
154
- private function fromCurrentStructure ( DOMNode $ node ): ?Iterator {
155
- if ( ! AssertDOMElement:: isValid ( $ node , 'table ' ) ) {
156
- $ this ->findTableStructureIn ( $ node );
167
+ private function childNodesIteratorOfTable ( DOMElement $ element ): ?Iterator {
168
+ if ( 'table ' !== $ element -> tagName ) {
169
+ $ this ->findTableStructureIn ( $ element );
157
170
158
171
return null ;
159
172
}
160
173
161
- return $ this ->isTargetedTable ( $ node ) && $ node ->childNodes ->length
162
- ? $ this ->getChildNodesIteratorFrom ( $ node )
174
+ return $ this ->isTargetedTable ( $ element ) && $ element ->childNodes ->length
175
+ ? $ this ->getChildNodesIteratorFrom ( $ element )
163
176
: null ;
164
177
}
165
178
@@ -209,12 +222,11 @@ private function headStructureContentFrom( DOMElement $node ): void {
209
222
$ this ->dispatchEvent ( new TableTraced ( Table::THead, EventAt::End, $ node , $ this ) );
210
223
}
211
224
212
- /** @return ?array{0:DOMElement,1:?DOMElement,2:?DOMElement} */
213
- private function traceStructureFrom ( DOMNode $ node ): ?array {
214
- if ( ! $ tableIterator = $ this ->fromCurrentStructure ( $ node ) ) {
215
- return null ;
216
- }
217
-
225
+ /**
226
+ * @param Iterator<int,DOMNode> $tableIterator
227
+ * @return ?array{0:DOMElement,1:?DOMElement,2:?DOMElement}
228
+ */
229
+ private function traceTableStructureIn ( Iterator $ tableIterator ): ?array {
218
230
$ bodyNode = $ captionNode = $ headNode = null ;
219
231
220
232
while ( ! $ bodyNode && $ tableIterator ->valid () ) {
@@ -303,4 +315,20 @@ private function discoveredTargetedTable( mixed $node ): bool {
303
315
&& AssertDOMElement::isValid ( $ node )
304
316
&& $ this ->isTargetedTable ( $ node );
305
317
}
318
+
319
+ /**
320
+ * @return DOMElement|DOMNodeList<DOMNode>
321
+ * @throws InvalidSource When source invalid.
322
+ */
323
+ private function getValidatedTableSource ( string |DOMElement $ source , bool $ normalize ): DOMElement |DOMNodeList {
324
+ if ( ! $ source instanceof DOMElement ) {
325
+ return DOMDocumentFactory::bodyFromHtml ( $ source , normalize: $ normalize )->childNodes ;
326
+ }
327
+
328
+ 'table ' !== $ source ->tagName && throw new InvalidSource (
329
+ sprintf ( '%s trait only supports table "DOMElement" ' , HtmlTableFromNode::class )
330
+ );
331
+
332
+ return $ source ;
333
+ }
306
334
}
0 commit comments