Skip to content

Commit 2077d42

Browse files
committed
issue #28,#59, #74 css selector
1 parent 7ee1d58 commit 2077d42

File tree

8 files changed

+353
-400
lines changed

8 files changed

+353
-400
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
],
2929
"license": "MIT",
3030
"dependencies": {
31+
"css-select": "^3.1.2",
3132
"he": "1.2.0"
3233
},
3334
"devDependencies": {

src/matcher.ts

Lines changed: 108 additions & 238 deletions
Original file line numberDiff line numberDiff line change
@@ -1,254 +1,124 @@
1+
import { Adapter/*, Predicate*/ } from 'css-select/lib/types';
12
import HTMLElement from './nodes/html';
3+
import Node from './nodes/node';
4+
import NodeType from './nodes/type';
25

3-
interface MatherFunction {
4-
func(el: HTMLElement, tagName: string, classes: string[] | string, attr_key: string, value: string): boolean;
5-
tagName: string;
6-
classes: string | string[];
7-
attr_key: string;
8-
value: string;
6+
export declare type Predicate = (node: Node) => node is HTMLElement;
7+
8+
function isTag(node: Node): node is HTMLElement {
9+
return node.nodeType === NodeType.ELEMENT_NODE;
910
}
1011

11-
/**
12-
* Cache to store generated match functions
13-
* @type {Object}
14-
*/
15-
let pMatchFunctionCache = {} as { [name: string]: MatherFunction };
12+
function getAttributeValue(elem: HTMLElement, name: string) {
13+
return elem.getAttribute(name);
14+
}
1615

17-
function compare_tagname(tag1: string, tag2: string) {
18-
if (!tag1) {
19-
return !tag2;
20-
}
21-
if (!tag2) {
22-
return !tag1;
23-
}
24-
return tag1.toLowerCase() === tag2.toLowerCase();
16+
function getName(elem: HTMLElement) {
17+
return (elem.rawTagName || '').toLowerCase();
2518
}
2619

27-
/**
28-
* Function cache
29-
*/
30-
const functionCache = {
31-
f145(el: HTMLElement, tagName: string, classes: string[]) {
32-
'use strict';
33-
tagName = tagName || '';
34-
classes = classes || [];
35-
if (el.id !== tagName.substr(1)) {
36-
return false;
37-
}
38-
for (let cls = classes, i = 0; i < cls.length; i++) {
39-
if (el.classNames.indexOf(cls[i]) === -1) {
40-
return false;
41-
}
42-
}
43-
return true;
44-
},
45-
f45(el: HTMLElement, tagName: string, classes: string[]) {
46-
'use strict';
47-
tagName = tagName || '';
48-
classes = classes || [];
49-
for (let cls = classes, i = 0; i < cls.length; i++) {
50-
if (el.classNames.indexOf(cls[i]) === -1) {
51-
return false;
20+
function getChildren(node: Node) {
21+
return node.childNodes;
22+
}
23+
24+
function getParent(node: Node) {
25+
return node.parentNode;
26+
}
27+
28+
function getText(node: Node) {
29+
return node.text;
30+
}
31+
32+
function removeSubsets(nodes: Node[]) {
33+
let idx = nodes.length;
34+
let node;
35+
let ancestor;
36+
let replace;
37+
38+
// Check if each node (or one of its ancestors) is already contained in the
39+
// array.
40+
while (--idx > -1) {
41+
node = ancestor = nodes[idx];
42+
43+
// Temporarily remove the node under consideration
44+
nodes[idx] = null;
45+
replace = true;
46+
47+
while (ancestor) {
48+
if (nodes.indexOf(ancestor) > -1) {
49+
replace = false;
50+
nodes.splice(idx, 1);
51+
break;
5252
}
53+
ancestor = getParent(ancestor);
5354
}
54-
return true;
55-
},
56-
f15(el: HTMLElement, tagName: string) {
57-
'use strict';
58-
tagName = tagName || '';
59-
if (el.id !== tagName.substr(1)) {
60-
return false;
61-
}
62-
return true;
63-
},
64-
f1(el: HTMLElement, tagName: string) {
65-
'use strict';
66-
tagName = tagName || '';
67-
if (el.id !== tagName.substr(1)) {
68-
return false;
69-
}
70-
},
71-
f5() {
72-
'use strict';
73-
return true;
74-
},
75-
f55(el: HTMLElement, tagName: string, classes: string[], attr_key: string) {
76-
'use strict';
77-
tagName = tagName || '';
78-
classes = classes || [];
79-
attr_key = attr_key || '';
80-
const attrs = el.attributes;
81-
return attrs.hasOwnProperty(attr_key);
82-
},
83-
f245(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
84-
'use strict';
85-
tagName = tagName || '';
86-
classes = classes || [];
87-
attr_key = (attr_key || '').toLowerCase();
88-
value = value || '';
89-
const attrs = el.attributes;
90-
return Object.keys(attrs).some((key) => {
91-
const val = attrs[key];
92-
return key.toLowerCase() === attr_key && val === value;
93-
});
94-
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
95-
// return true;
96-
},
97-
f25(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
98-
'use strict';
99-
tagName = tagName || '';
100-
classes = classes || [];
101-
attr_key = (attr_key || '').toLowerCase();
102-
value = value || '';
103-
const attrs = el.attributes;
104-
return Object.keys(attrs).some((key) => {
105-
const val = attrs[key];
106-
return key.toLowerCase() === attr_key && val === value;
107-
});
108-
// return true;
109-
},
110-
f2(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
111-
'use strict';
112-
tagName = tagName || '';
113-
classes = classes || [];
114-
attr_key = (attr_key || '').toLowerCase();
115-
value = value || '';
116-
const attrs = el.attributes;
117-
return Object.keys(attrs).some((key) => {
118-
const val = attrs[key];
119-
return key.toLowerCase() === attr_key && val === value;
120-
});
121-
},
122-
f345(el: HTMLElement, tagName: string, classes: string[]) {
123-
'use strict';
124-
tagName = tagName || '';
125-
classes = classes || [];
126-
if (!compare_tagname(el.tagName, tagName)) {
127-
return false;
128-
}
129-
for (let cls = classes, i = 0; i < cls.length; i++) {
130-
if (el.classNames.indexOf(cls[i]) === -1) {
131-
return false;
132-
}
55+
56+
// If the node has been found to be unique, re-insert it.
57+
if (replace) {
58+
nodes[idx] = node;
13359
}
134-
return true;
135-
},
136-
f35(el: HTMLElement, tagName: string) {
137-
'use strict';
138-
tagName = tagName || '';
139-
return compare_tagname(el.tagName, tagName);
140-
},
141-
f3(el: HTMLElement, tagName: string) {
142-
'use strict';
143-
tagName = tagName || '';
144-
// if (el.tagName !== tagName) {
145-
// return false;
146-
// }
147-
return compare_tagname(el.tagName, tagName);
14860
}
149-
};
150-
151-
/**
152-
* Matcher class to make CSS match
153-
*
154-
* @class Matcher
155-
*/
156-
export default class Matcher {
157-
private matchers: MatherFunction[];
158-
private nextMatch = 0;
159-
/**
160-
* Creates an instance of Matcher.
161-
* @param {string} selector
162-
*
163-
* @memberof Matcher
164-
*/
165-
public constructor(selector: string) {
166-
this.matchers = selector.split(' ').map((matcher) => {
167-
if (pMatchFunctionCache[matcher]) {
168-
return pMatchFunctionCache[matcher];
169-
}
170-
const parts = matcher.split('.');
171-
const tagName = parts[0];
172-
const classes = parts.slice(1).sort();
173-
// let source = '"use strict";';
174-
let function_name = 'f';
175-
let attr_key = '';
176-
let value = '';
177-
if (tagName && tagName !== '*') {
178-
if (tagName.startsWith('#')) {
179-
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
180-
function_name += '1';
181-
} else {
182-
// https://github.com/taoqf/node-html-parser/issues/86
183-
// const reg = /\[\s*([\w-]+)(\s*=\s*(((?<quote>'|")\s*(.*)(\k<quote>))|(\S*)))?\s*\]/.exec(tagName);
184-
// `[a-b]`,`[ a-b ]`,`[a-b=c]`, `[a-b=c'd]`,`[a-b='c\' d"e ']`,`[ a-b = 'c\' d"e ' ]`,`[a-b="c' d\"e " ]`,`[ a-b = "c' d\"e " ]`
185-
const reg = /\[\s*([\w-]+)(\s*=\s*(('\s*(.*)'|"\s*(.*)")|(\S*)))?\s*\]/.exec(tagName);
186-
if (reg) {
187-
attr_key = reg[1];
188-
value = reg[5] || reg[6] || reg[7];
189-
190-
// source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
191-
function_name += '2';
192-
} else {
193-
// source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
194-
function_name += '3';
195-
}
196-
}
197-
}
198-
if (classes.length > 0) {
199-
// source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
200-
function_name += '4';
61+
62+
return nodes;
63+
}
64+
65+
function existsOne(test: Predicate, elems: Node[]): boolean {
66+
return elems.some((elem) => {
67+
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
68+
});
69+
}
70+
71+
function getSiblings(node: Node) {
72+
const parent = getParent(node);
73+
return parent && getChildren(parent);
74+
}
75+
76+
function hasAttrib(elem: HTMLElement, name: string) {
77+
return getAttributeValue(elem, name) !== undefined;
78+
}
79+
80+
function findOne(test: Predicate, elems: Node[]) {
81+
let elem = null as HTMLElement | null;
82+
83+
for (let i = 0, l = elems.length; i < l && !elem; i++) {
84+
const el = elems[i];
85+
if (test(el)) {
86+
elem = el;
87+
} else {
88+
const childs = getChildren(el);
89+
if (childs && childs.length > 0) {
90+
elem = findOne(test, childs);
20191
}
202-
// source += 'return true;';// 5
203-
function_name += '5';
204-
const obj = {
205-
func: functionCache[function_name] as (el: HTMLElement, tagName: string, classes: string | string[], attr_key: string, value: string) => boolean,
206-
tagName: tagName || '',
207-
classes: classes || '',
208-
attr_key: attr_key || '',
209-
value: value || ''
210-
} as MatherFunction;
211-
// source = source || '';
212-
return (pMatchFunctionCache[matcher] = obj);
213-
});
214-
}
215-
/**
216-
* Trying to advance match pointer
217-
* @param {HTMLElement} el element to make the match
218-
* @return {bool} true when pointer advanced.
219-
*/
220-
public advance(el: HTMLElement) {
221-
if (this.nextMatch < this.matchers.length &&
222-
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
223-
this.nextMatch++;
224-
return true;
22592
}
226-
return false;
227-
}
228-
/**
229-
* Rewind the match pointer
230-
*/
231-
public rewind() {
232-
this.nextMatch--;
233-
}
234-
/**
235-
* Trying to determine if match made.
236-
* @return {bool} true when the match is made
237-
*/
238-
public get matched() {
239-
return this.nextMatch === this.matchers.length;
24093
}
241-
/**
242-
* Rest match pointer.
243-
* @return {[type]} [description]
244-
*/
245-
public reset() {
246-
this.nextMatch = 0;
247-
}
248-
/**
249-
* flush cache to free memory
250-
*/
251-
public flushCache() {
252-
pMatchFunctionCache = {};
94+
95+
return elem;
96+
}
97+
98+
function findAll(test: Predicate, nodes: Node[]): Node[] {
99+
let result = [] as Node[];
100+
101+
for (let i = 0, j = nodes.length; i < j; i++) {
102+
if (!isTag(nodes[i])) continue;
103+
if (test(nodes[i])) result.push(nodes[i]);
104+
const childs = getChildren(nodes[i]);
105+
if (childs) result = result.concat(findAll(test, childs));
253106
}
107+
108+
return result;
254109
}
110+
111+
export default {
112+
isTag,
113+
getAttributeValue,
114+
getName,
115+
getChildren,
116+
getParent,
117+
getText,
118+
removeSubsets,
119+
existsOne,
120+
getSiblings,
121+
hasAttrib,
122+
findOne,
123+
findAll
124+
} as Adapter<Node, HTMLElement>;

src/nodes/comment.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import Node from './node';
22
import NodeType from './type';
33

44
export default class CommentNode extends Node {
5-
public constructor(public rawText: string) {
6-
super();
5+
public constructor(public rawText: string, parentNode: Node) {
6+
super(parentNode);
77
}
88

99
/**

0 commit comments

Comments
 (0)