|
| 1 | +import { Adapter/*, Predicate*/ } from 'css-select/lib/types'; |
1 | 2 | import HTMLElement from './nodes/html';
|
| 3 | +import Node from './nodes/node'; |
| 4 | +import NodeType from './nodes/type'; |
2 | 5 |
|
3 |
| -interface MatherFunction { |
4 |
| - func(el: HTMLElement, tagName: string, classes: string[] | string, attr_key: string, value: string): boolean; |
5 |
| - tagName: string; |
6 |
| - classes: string | string[]; |
7 |
| - attr_key: string; |
8 |
| - value: string; |
| 6 | +export declare type Predicate = (node: Node) => node is HTMLElement; |
| 7 | + |
| 8 | +function isTag(node: Node): node is HTMLElement { |
| 9 | + return node.nodeType === NodeType.ELEMENT_NODE; |
9 | 10 | }
|
10 | 11 |
|
11 |
| -/** |
12 |
| - * Cache to store generated match functions |
13 |
| - * @type {Object} |
14 |
| - */ |
15 |
| -let pMatchFunctionCache = {} as { [name: string]: MatherFunction }; |
| 12 | +function getAttributeValue(elem: HTMLElement, name: string) { |
| 13 | + return elem.getAttribute(name); |
| 14 | +} |
16 | 15 |
|
17 |
| -function compare_tagname(tag1: string, tag2: string) { |
18 |
| - if (!tag1) { |
19 |
| - return !tag2; |
20 |
| - } |
21 |
| - if (!tag2) { |
22 |
| - return !tag1; |
23 |
| - } |
24 |
| - return tag1.toLowerCase() === tag2.toLowerCase(); |
| 16 | +function getName(elem: HTMLElement) { |
| 17 | + return (elem.rawTagName || '').toLowerCase(); |
25 | 18 | }
|
26 | 19 |
|
27 |
| -/** |
28 |
| - * Function cache |
29 |
| - */ |
30 |
| -const functionCache = { |
31 |
| - f145(el: HTMLElement, tagName: string, classes: string[]) { |
32 |
| - 'use strict'; |
33 |
| - tagName = tagName || ''; |
34 |
| - classes = classes || []; |
35 |
| - if (el.id !== tagName.substr(1)) { |
36 |
| - return false; |
37 |
| - } |
38 |
| - for (let cls = classes, i = 0; i < cls.length; i++) { |
39 |
| - if (el.classNames.indexOf(cls[i]) === -1) { |
40 |
| - return false; |
41 |
| - } |
42 |
| - } |
43 |
| - return true; |
44 |
| - }, |
45 |
| - f45(el: HTMLElement, tagName: string, classes: string[]) { |
46 |
| - 'use strict'; |
47 |
| - tagName = tagName || ''; |
48 |
| - classes = classes || []; |
49 |
| - for (let cls = classes, i = 0; i < cls.length; i++) { |
50 |
| - if (el.classNames.indexOf(cls[i]) === -1) { |
51 |
| - return false; |
| 20 | +function getChildren(node: Node) { |
| 21 | + return node.childNodes; |
| 22 | +} |
| 23 | + |
| 24 | +function getParent(node: Node) { |
| 25 | + return node.parentNode; |
| 26 | +} |
| 27 | + |
| 28 | +function getText(node: Node) { |
| 29 | + return node.text; |
| 30 | +} |
| 31 | + |
| 32 | +function removeSubsets(nodes: Node[]) { |
| 33 | + let idx = nodes.length; |
| 34 | + let node; |
| 35 | + let ancestor; |
| 36 | + let replace; |
| 37 | + |
| 38 | + // Check if each node (or one of its ancestors) is already contained in the |
| 39 | + // array. |
| 40 | + while (--idx > -1) { |
| 41 | + node = ancestor = nodes[idx]; |
| 42 | + |
| 43 | + // Temporarily remove the node under consideration |
| 44 | + nodes[idx] = null; |
| 45 | + replace = true; |
| 46 | + |
| 47 | + while (ancestor) { |
| 48 | + if (nodes.indexOf(ancestor) > -1) { |
| 49 | + replace = false; |
| 50 | + nodes.splice(idx, 1); |
| 51 | + break; |
52 | 52 | }
|
| 53 | + ancestor = getParent(ancestor); |
53 | 54 | }
|
54 |
| - return true; |
55 |
| - }, |
56 |
| - f15(el: HTMLElement, tagName: string) { |
57 |
| - 'use strict'; |
58 |
| - tagName = tagName || ''; |
59 |
| - if (el.id !== tagName.substr(1)) { |
60 |
| - return false; |
61 |
| - } |
62 |
| - return true; |
63 |
| - }, |
64 |
| - f1(el: HTMLElement, tagName: string) { |
65 |
| - 'use strict'; |
66 |
| - tagName = tagName || ''; |
67 |
| - if (el.id !== tagName.substr(1)) { |
68 |
| - return false; |
69 |
| - } |
70 |
| - }, |
71 |
| - f5() { |
72 |
| - 'use strict'; |
73 |
| - return true; |
74 |
| - }, |
75 |
| - f55(el: HTMLElement, tagName: string, classes: string[], attr_key: string) { |
76 |
| - 'use strict'; |
77 |
| - tagName = tagName || ''; |
78 |
| - classes = classes || []; |
79 |
| - attr_key = attr_key || ''; |
80 |
| - const attrs = el.attributes; |
81 |
| - return attrs.hasOwnProperty(attr_key); |
82 |
| - }, |
83 |
| - f245(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { |
84 |
| - 'use strict'; |
85 |
| - tagName = tagName || ''; |
86 |
| - classes = classes || []; |
87 |
| - attr_key = (attr_key || '').toLowerCase(); |
88 |
| - value = value || ''; |
89 |
| - const attrs = el.attributes; |
90 |
| - return Object.keys(attrs).some((key) => { |
91 |
| - const val = attrs[key]; |
92 |
| - return key.toLowerCase() === attr_key && val === value; |
93 |
| - }); |
94 |
| - // for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}} |
95 |
| - // return true; |
96 |
| - }, |
97 |
| - f25(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { |
98 |
| - 'use strict'; |
99 |
| - tagName = tagName || ''; |
100 |
| - classes = classes || []; |
101 |
| - attr_key = (attr_key || '').toLowerCase(); |
102 |
| - value = value || ''; |
103 |
| - const attrs = el.attributes; |
104 |
| - return Object.keys(attrs).some((key) => { |
105 |
| - const val = attrs[key]; |
106 |
| - return key.toLowerCase() === attr_key && val === value; |
107 |
| - }); |
108 |
| - // return true; |
109 |
| - }, |
110 |
| - f2(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { |
111 |
| - 'use strict'; |
112 |
| - tagName = tagName || ''; |
113 |
| - classes = classes || []; |
114 |
| - attr_key = (attr_key || '').toLowerCase(); |
115 |
| - value = value || ''; |
116 |
| - const attrs = el.attributes; |
117 |
| - return Object.keys(attrs).some((key) => { |
118 |
| - const val = attrs[key]; |
119 |
| - return key.toLowerCase() === attr_key && val === value; |
120 |
| - }); |
121 |
| - }, |
122 |
| - f345(el: HTMLElement, tagName: string, classes: string[]) { |
123 |
| - 'use strict'; |
124 |
| - tagName = tagName || ''; |
125 |
| - classes = classes || []; |
126 |
| - if (!compare_tagname(el.tagName, tagName)) { |
127 |
| - return false; |
128 |
| - } |
129 |
| - for (let cls = classes, i = 0; i < cls.length; i++) { |
130 |
| - if (el.classNames.indexOf(cls[i]) === -1) { |
131 |
| - return false; |
132 |
| - } |
| 55 | + |
| 56 | + // If the node has been found to be unique, re-insert it. |
| 57 | + if (replace) { |
| 58 | + nodes[idx] = node; |
133 | 59 | }
|
134 |
| - return true; |
135 |
| - }, |
136 |
| - f35(el: HTMLElement, tagName: string) { |
137 |
| - 'use strict'; |
138 |
| - tagName = tagName || ''; |
139 |
| - return compare_tagname(el.tagName, tagName); |
140 |
| - }, |
141 |
| - f3(el: HTMLElement, tagName: string) { |
142 |
| - 'use strict'; |
143 |
| - tagName = tagName || ''; |
144 |
| - // if (el.tagName !== tagName) { |
145 |
| - // return false; |
146 |
| - // } |
147 |
| - return compare_tagname(el.tagName, tagName); |
148 | 60 | }
|
149 |
| -}; |
150 |
| - |
151 |
| -/** |
152 |
| - * Matcher class to make CSS match |
153 |
| - * |
154 |
| - * @class Matcher |
155 |
| - */ |
156 |
| -export default class Matcher { |
157 |
| - private matchers: MatherFunction[]; |
158 |
| - private nextMatch = 0; |
159 |
| - /** |
160 |
| - * Creates an instance of Matcher. |
161 |
| - * @param {string} selector |
162 |
| - * |
163 |
| - * @memberof Matcher |
164 |
| - */ |
165 |
| - public constructor(selector: string) { |
166 |
| - this.matchers = selector.split(' ').map((matcher) => { |
167 |
| - if (pMatchFunctionCache[matcher]) { |
168 |
| - return pMatchFunctionCache[matcher]; |
169 |
| - } |
170 |
| - const parts = matcher.split('.'); |
171 |
| - const tagName = parts[0]; |
172 |
| - const classes = parts.slice(1).sort(); |
173 |
| - // let source = '"use strict";'; |
174 |
| - let function_name = 'f'; |
175 |
| - let attr_key = ''; |
176 |
| - let value = ''; |
177 |
| - if (tagName && tagName !== '*') { |
178 |
| - if (tagName.startsWith('#')) { |
179 |
| - // source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1 |
180 |
| - function_name += '1'; |
181 |
| - } else { |
182 |
| - // https://github.com/taoqf/node-html-parser/issues/86 |
183 |
| - // const reg = /\[\s*([\w-]+)(\s*=\s*(((?<quote>'|")\s*(.*)(\k<quote>))|(\S*)))?\s*\]/.exec(tagName); |
184 |
| - // `[a-b]`,`[ a-b ]`,`[a-b=c]`, `[a-b=c'd]`,`[a-b='c\' d"e ']`,`[ a-b = 'c\' d"e ' ]`,`[a-b="c' d\"e " ]`,`[ a-b = "c' d\"e " ]` |
185 |
| - const reg = /\[\s*([\w-]+)(\s*=\s*(('\s*(.*)'|"\s*(.*)")|(\S*)))?\s*\]/.exec(tagName); |
186 |
| - if (reg) { |
187 |
| - attr_key = reg[1]; |
188 |
| - value = reg[5] || reg[6] || reg[7]; |
189 |
| - |
190 |
| - // source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2 |
191 |
| - function_name += '2'; |
192 |
| - } else { |
193 |
| - // source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3 |
194 |
| - function_name += '3'; |
195 |
| - } |
196 |
| - } |
197 |
| - } |
198 |
| - if (classes.length > 0) { |
199 |
| - // source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4 |
200 |
| - function_name += '4'; |
| 61 | + |
| 62 | + return nodes; |
| 63 | +} |
| 64 | + |
| 65 | +function existsOne(test: Predicate, elems: Node[]): boolean { |
| 66 | + return elems.some((elem) => { |
| 67 | + return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false; |
| 68 | + }); |
| 69 | +} |
| 70 | + |
| 71 | +function getSiblings(node: Node) { |
| 72 | + const parent = getParent(node); |
| 73 | + return parent && getChildren(parent); |
| 74 | +} |
| 75 | + |
| 76 | +function hasAttrib(elem: HTMLElement, name: string) { |
| 77 | + return getAttributeValue(elem, name) !== undefined; |
| 78 | +} |
| 79 | + |
| 80 | +function findOne(test: Predicate, elems: Node[]) { |
| 81 | + let elem = null as HTMLElement | null; |
| 82 | + |
| 83 | + for (let i = 0, l = elems.length; i < l && !elem; i++) { |
| 84 | + const el = elems[i]; |
| 85 | + if (test(el)) { |
| 86 | + elem = el; |
| 87 | + } else { |
| 88 | + const childs = getChildren(el); |
| 89 | + if (childs && childs.length > 0) { |
| 90 | + elem = findOne(test, childs); |
201 | 91 | }
|
202 |
| - // source += 'return true;';// 5 |
203 |
| - function_name += '5'; |
204 |
| - const obj = { |
205 |
| - func: functionCache[function_name] as (el: HTMLElement, tagName: string, classes: string | string[], attr_key: string, value: string) => boolean, |
206 |
| - tagName: tagName || '', |
207 |
| - classes: classes || '', |
208 |
| - attr_key: attr_key || '', |
209 |
| - value: value || '' |
210 |
| - } as MatherFunction; |
211 |
| - // source = source || ''; |
212 |
| - return (pMatchFunctionCache[matcher] = obj); |
213 |
| - }); |
214 |
| - } |
215 |
| - /** |
216 |
| - * Trying to advance match pointer |
217 |
| - * @param {HTMLElement} el element to make the match |
218 |
| - * @return {bool} true when pointer advanced. |
219 |
| - */ |
220 |
| - public advance(el: HTMLElement) { |
221 |
| - if (this.nextMatch < this.matchers.length && |
222 |
| - this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) { |
223 |
| - this.nextMatch++; |
224 |
| - return true; |
225 | 92 | }
|
226 |
| - return false; |
227 |
| - } |
228 |
| - /** |
229 |
| - * Rewind the match pointer |
230 |
| - */ |
231 |
| - public rewind() { |
232 |
| - this.nextMatch--; |
233 |
| - } |
234 |
| - /** |
235 |
| - * Trying to determine if match made. |
236 |
| - * @return {bool} true when the match is made |
237 |
| - */ |
238 |
| - public get matched() { |
239 |
| - return this.nextMatch === this.matchers.length; |
240 | 93 | }
|
241 |
| - /** |
242 |
| - * Rest match pointer. |
243 |
| - * @return {[type]} [description] |
244 |
| - */ |
245 |
| - public reset() { |
246 |
| - this.nextMatch = 0; |
247 |
| - } |
248 |
| - /** |
249 |
| - * flush cache to free memory |
250 |
| - */ |
251 |
| - public flushCache() { |
252 |
| - pMatchFunctionCache = {}; |
| 94 | + |
| 95 | + return elem; |
| 96 | +} |
| 97 | + |
| 98 | +function findAll(test: Predicate, nodes: Node[]): Node[] { |
| 99 | + let result = [] as Node[]; |
| 100 | + |
| 101 | + for (let i = 0, j = nodes.length; i < j; i++) { |
| 102 | + if (!isTag(nodes[i])) continue; |
| 103 | + if (test(nodes[i])) result.push(nodes[i]); |
| 104 | + const childs = getChildren(nodes[i]); |
| 105 | + if (childs) result = result.concat(findAll(test, childs)); |
253 | 106 | }
|
| 107 | + |
| 108 | + return result; |
254 | 109 | }
|
| 110 | + |
| 111 | +export default { |
| 112 | + isTag, |
| 113 | + getAttributeValue, |
| 114 | + getName, |
| 115 | + getChildren, |
| 116 | + getParent, |
| 117 | + getText, |
| 118 | + removeSubsets, |
| 119 | + existsOne, |
| 120 | + getSiblings, |
| 121 | + hasAttrib, |
| 122 | + findOne, |
| 123 | + findAll |
| 124 | +} as Adapter<Node, HTMLElement>; |
0 commit comments