@@ -22,10 +22,17 @@ trait HtmlTableFromNode {
2222 /** @use TableExtractor<TColumnReturn> */
2323 use TableExtractor;
2424
25- public function inferTableFrom ( string $ source , bool $ normalize = true ): void {
26- $ this ->inferTableFromDOMNodeList (
27- DOMDocumentFactory::bodyFromHtml ( $ source , normalize: $ normalize )->childNodes
28- );
25+ /** @throws InvalidSource When "table" cannot be resolved in given source. */
26+ public function inferTableFrom ( string |DOMElement $ source , bool $ normalize = true ): void {
27+ $ source = $ this ->getValidatedTableSource ( $ source , $ normalize );
28+
29+ if ( $ source instanceof DOMNodeList ) {
30+ $ this ->inferTableFromDOMNodeList ( $ source );
31+
32+ return ;
33+ }
34+
35+ $ this ->inferChildNodesFromTable ( $ source );
2936 }
3037
3138 /** @param DOMNodeList<DOMNode> $elementList */
@@ -87,37 +94,43 @@ public function inferTableDataFrom( iterable $elementList ): array {
8794 return $ data ;
8895 }
8996
90- /** @param DOMNodeList<DOMNode> $elementList */
91- protected function inferTableFromDOMNodeList ( DOMNodeList $ elementList ): void {
92- foreach ( $ elementList as $ node ) {
93- if ( ! $ tableStructure = $ this ->traceStructureFrom ( $ node ) ) {
94- continue ;
95- }
97+ protected function inferChildNodesFromTable ( DOMElement $ element ): bool {
98+ $ iterator = $ this ->childNodesIteratorOfTable ( $ element );
9699
97- assert ( $ node instanceof DOMElement );
100+ if ( ! $ iterator || ! $ tableStructure = $ this ->traceTableStructureIn ( $ iterator ) ) {
101+ return false ;
102+ }
98103
99- [$ bodyNode , $ captionNode , $ headNode ] = $ tableStructure ;
104+ [$ bodyNode , $ captionNode , $ headNode ] = $ tableStructure ;
100105
101- $ splId = spl_object_id ( $ node );
102- $ id = $ splId * spl_object_id ( $ bodyNode );
106+ $ splId = spl_object_id ( $ element );
107+ $ id = $ splId * spl_object_id ( $ bodyNode );
103108
104- $ this ->dispatchEventForTable ( $ id , $ bodyNode );
109+ $ this ->dispatchEventForTable ( $ id , $ bodyNode );
105110
106- $ captionNode && $ this ->captionStructureContentFrom ( $ captionNode );
107- $ headNode && $ this ->headStructureContentFrom ( $ headNode );
111+ $ captionNode && $ this ->captionStructureContentFrom ( $ captionNode );
112+ $ headNode && $ this ->headStructureContentFrom ( $ headNode );
108113
109- $ iterator = $ this ->bodyStructureIteratorFrom ( $ bodyNode );
114+ $ iterator = $ this ->bodyStructureIteratorFrom ( $ bodyNode );
110115
111- $ iterator ->valid () && ( $ this ->discoveredTable__rows [ $ id ] = $ iterator );
116+ $ iterator ->valid () && ( $ this ->discoveredTable__rows [ $ id ] = $ iterator );
112117
113- if ( $ this ->discoveredTargetedTable ( $ node ) ) {
114- $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ node , $ this ) );
118+ $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ element , $ this ) );
115119
116- break ;
120+ return true ;
121+ }
122+
123+ /** @param DOMNodeList<DOMNode> $elementList */
124+ protected function inferTableFromDOMNodeList ( DOMNodeList $ elementList ): void {
125+ foreach ( $ elementList as $ node ) {
126+ if ( ! AssertDOMElement::isValid ( $ node ) || ! $ this ->inferChildNodesFromTable ( $ node ) ) {
127+ continue ;
117128 }
118129
119- $ this ->dispatchEvent ( new TableTraced ( Table::TBody, EventAt::End, $ node , $ this ) );
120- }//end foreach
130+ if ( $ this ->discoveredTargetedTable ( $ node ) ) {
131+ break ;
132+ }
133+ }
121134 }
122135
123136 final protected function findTableStructureIn ( DOMNode $ node , int $ minChildNodesCount = 0 ): void {
@@ -132,8 +145,8 @@ final protected function isTableRowStructure( DOMNode $node ): bool {
132145 }
133146
134147 /** @return Iterator<int,DOMNode> */
135- private function getChildNodesIteratorFrom ( DOMNode $ node ): Iterator {
136- return $ node ->childNodes ->getIterator ();
148+ private function getChildNodesIteratorFrom ( DOMElement $ element ): Iterator {
149+ return $ element ->childNodes ->getIterator ();
137150 }
138151
139152 /**
@@ -151,15 +164,15 @@ private function assertCurrentColumnIsDOMElement( mixed $node ): void {
151164 }
152165
153166 /** @return ?Iterator<int,DOMNode> */
154- private function fromCurrentStructure ( DOMNode $ node ): ?Iterator {
155- if ( ! AssertDOMElement:: isValid ( $ node , 'table ' ) ) {
156- $ this ->findTableStructureIn ( $ node );
167+ private function childNodesIteratorOfTable ( DOMElement $ element ): ?Iterator {
168+ if ( 'table ' !== $ element -> tagName ) {
169+ $ this ->findTableStructureIn ( $ element );
157170
158171 return null ;
159172 }
160173
161- return $ this ->isTargetedTable ( $ node ) && $ node ->childNodes ->length
162- ? $ this ->getChildNodesIteratorFrom ( $ node )
174+ return $ this ->isTargetedTable ( $ element ) && $ element ->childNodes ->length
175+ ? $ this ->getChildNodesIteratorFrom ( $ element )
163176 : null ;
164177 }
165178
@@ -209,12 +222,11 @@ private function headStructureContentFrom( DOMElement $node ): void {
209222 $ this ->dispatchEvent ( new TableTraced ( Table::THead, EventAt::End, $ node , $ this ) );
210223 }
211224
212- /** @return ?array{0:DOMElement,1:?DOMElement,2:?DOMElement} */
213- private function traceStructureFrom ( DOMNode $ node ): ?array {
214- if ( ! $ tableIterator = $ this ->fromCurrentStructure ( $ node ) ) {
215- return null ;
216- }
217-
225+ /**
226+ * @param Iterator<int,DOMNode> $tableIterator
227+ * @return ?array{0:DOMElement,1:?DOMElement,2:?DOMElement}
228+ */
229+ private function traceTableStructureIn ( Iterator $ tableIterator ): ?array {
218230 $ bodyNode = $ captionNode = $ headNode = null ;
219231
220232 while ( ! $ bodyNode && $ tableIterator ->valid () ) {
@@ -303,4 +315,20 @@ private function discoveredTargetedTable( mixed $node ): bool {
303315 && AssertDOMElement::isValid ( $ node )
304316 && $ this ->isTargetedTable ( $ node );
305317 }
318+
319+ /**
320+ * @return DOMElement|DOMNodeList<DOMNode>
321+ * @throws InvalidSource When source invalid.
322+ */
323+ private function getValidatedTableSource ( string |DOMElement $ source , bool $ normalize ): DOMElement |DOMNodeList {
324+ if ( ! $ source instanceof DOMElement ) {
325+ return DOMDocumentFactory::bodyFromHtml ( $ source , normalize: $ normalize )->childNodes ;
326+ }
327+
328+ 'table ' !== $ source ->tagName && throw new InvalidSource (
329+ sprintf ( '%s trait only supports table "DOMElement" ' , HtmlTableFromNode::class )
330+ );
331+
332+ return $ source ;
333+ }
306334}
0 commit comments