Skip to content

Commit a4b12ae

Browse files
committed
Add parser support for functional pseudo-elements. See #29.
1 parent 039a844 commit a4b12ae

File tree

3 files changed

+141
-32
lines changed

3 files changed

+141
-32
lines changed

cssselect/parser.py

+46-17
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ class Selector(object):
5757
"""
5858
def __init__(self, tree, pseudo_element=None):
5959
self.parsed_tree = tree
60-
if pseudo_element is not None:
60+
if pseudo_element is not None and not isinstance(
61+
pseudo_element, FunctionalPseudoElement):
6162
pseudo_element = ascii_lower(pseudo_element)
6263
#: The identifier for the pseudo-element as a string, or ``None``.
6364
#:
@@ -78,6 +79,8 @@ def __init__(self, tree, pseudo_element=None):
7879
self.pseudo_element = pseudo_element
7980

8081
def __repr__(self):
82+
if isinstance(self.pseudo_element, FunctionalPseudoElement):
83+
pseudo_element = repr(self.pseudo_element)
8184
if self.pseudo_element:
8285
pseudo_element = '::%s' % self.pseudo_element
8386
else:
@@ -115,6 +118,28 @@ def specificity(self):
115118
return a, b, c
116119

117120

121+
class FunctionalPseudoElement(object):
122+
"""
123+
Represents selector::name(expr)
124+
"""
125+
def __init__(self, name, arguments):
126+
self.name = ascii_lower(name)
127+
self.arguments = arguments
128+
129+
def __repr__(self):
130+
return '%s[::%s(%r)]' % (
131+
self.__class__.__name__, self.name,
132+
[token.value for token in self.arguments])
133+
134+
def argument_types(self):
135+
return [token.type for token in self.arguments]
136+
137+
def specificity(self):
138+
a, b, c = self.selector.specificity()
139+
b += 1
140+
return a, b, c
141+
142+
118143
class Function(object):
119144
"""
120145
Represents selector:name(expr)
@@ -398,6 +423,10 @@ def parse_simple_selector(stream, inside_negation=False):
398423
if stream.peek() == ('DELIM', ':'):
399424
stream.next()
400425
pseudo_element = stream.next_ident()
426+
if stream.peek() == ('DELIM', '('):
427+
stream.next()
428+
pseudo_element = FunctionalPseudoElement(
429+
pseudo_element, parse_arguments(stream))
401430
continue
402431
ident = stream.next_ident()
403432
if ident.lower() in ('first-line', 'first-letter',
@@ -425,22 +454,7 @@ def parse_simple_selector(stream, inside_negation=False):
425454
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
426455
result = Negation(result, argument)
427456
else:
428-
arguments = []
429-
while 1:
430-
stream.skip_whitespace()
431-
next = stream.next()
432-
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
433-
('DELIM', '+'), ('DELIM', '-')]:
434-
arguments.append(next)
435-
elif next == ('DELIM', ')'):
436-
break
437-
else:
438-
raise SelectorSyntaxError(
439-
"Expected an argument, got %s" % (next,))
440-
if not arguments:
441-
raise SelectorSyntaxError(
442-
"Expected at least one argument, got %s" % (next,))
443-
result = Function(result, ident, arguments)
457+
result = Function(result, ident, parse_arguments(stream))
444458
else:
445459
raise SelectorSyntaxError(
446460
"Expected selector, got %s" % (peek,))
@@ -450,6 +464,21 @@ def parse_simple_selector(stream, inside_negation=False):
450464
return result, pseudo_element
451465

452466

467+
def parse_arguments(stream):
468+
arguments = []
469+
while 1:
470+
stream.skip_whitespace()
471+
next = stream.next()
472+
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
473+
('DELIM', '+'), ('DELIM', '-')]:
474+
arguments.append(next)
475+
elif next == ('DELIM', ')'):
476+
return arguments
477+
else:
478+
raise SelectorSyntaxError(
479+
"Expected an argument, got %s" % (next,))
480+
481+
453482
def parse_attrib(selector, stream):
454483
stream.skip_whitespace()
455484
attrib = stream.next_ident_or_star()

cssselect/tests.py

+73-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
from lxml import etree, html
2424
from cssselect import (parse, GenericTranslator, HTMLTranslator,
2525
SelectorSyntaxError, ExpressionError)
26-
from cssselect.parser import tokenize, parse_series, _unicode
26+
from cssselect.parser import (tokenize, parse_series, _unicode,
27+
FunctionalPseudoElement)
28+
from cssselect.xpath import _unicode_safe_getattr, XPathExpr
2729

2830

2931
if sys.version_info[0] < 3:
@@ -150,6 +152,7 @@ def parse_pseudo(css):
150152
result = []
151153
for selector in parse(css):
152154
pseudo = selector.pseudo_element
155+
pseudo = _unicode(pseudo) if pseudo else pseudo
153156
# No Symbol here
154157
assert pseudo is None or type(pseudo) is _unicode
155158
selector = repr(selector.parsed_tree).replace("(u'", "('")
@@ -176,6 +179,10 @@ def parse_one(css):
176179
assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
177180
assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')
178181

182+
assert parse_one('::text-content') == ('Element[*]', 'text-content')
183+
assert parse_one('::attr(name)') == (
184+
"Element[*]", "FunctionalPseudoElement[::attr(['name'])]")
185+
179186
assert parse_one('::Selection') == ('Element[*]', 'selection')
180187
assert parse_one('foo:after') == ('Element[foo]', 'after')
181188
assert parse_one('foo::selection') == ('Element[foo]', 'selection')
@@ -264,8 +271,6 @@ def get_error(css):
264271
"Expected ident or '*', got <DELIM '#' at 1>")
265272
assert get_error('[foo=#]') == (
266273
"Expected string or ident, got <DELIM '#' at 5>")
267-
assert get_error(':nth-child()') == (
268-
"Expected at least one argument, got <DELIM ')' at 11>")
269274
assert get_error('[href]a') == (
270275
"Expected selector, got <IDENT 'a' at 6>")
271276
assert get_error('[rel=stylesheet]') == None
@@ -436,6 +441,71 @@ def test_unicode_escapes(self):
436441
assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == (
437442
'''descendant-or-self::*[@aval = "' '"]''')
438443

444+
def test_xpath_pseudo_elements(self):
445+
class CustomTranslator(GenericTranslator):
446+
def xpath_pseudo_element(self, xpath, pseudo_element):
447+
if isinstance(pseudo_element, FunctionalPseudoElement):
448+
method = 'xpath_%s_functional_pseudo_element' % (
449+
pseudo_element.name.replace('-', '_'))
450+
method = _unicode_safe_getattr(self, method, None)
451+
if not method:
452+
raise ExpressionError(
453+
"The functional pseudo-element ::%s() is unknown"
454+
% functional.name)
455+
xpath = method(xpath, pseudo_element.arguments)
456+
else:
457+
method = 'xpath_%s_simple_pseudo_element' % (
458+
pseudo_element.replace('-', '_'))
459+
method = _unicode_safe_getattr(self, method, None)
460+
if not method:
461+
raise ExpressionError(
462+
"The pseudo-element ::%s is unknown"
463+
% pseudo_element)
464+
xpath = method(xpath)
465+
return xpath
466+
467+
# functional pseudo-class:
468+
# elements that have a certain number of attributes
469+
def xpath_nb_attr_function(self, xpath, function):
470+
nb_attributes = int(function.arguments[0].value)
471+
return xpath.add_condition(
472+
"count(@*)=%d" % nb_attributes)
473+
474+
# pseudo-class:
475+
# elements that have 5 attributes
476+
def xpath_five_attributes_pseudo(self, xpath):
477+
return xpath.add_condition("count(@*)=5")
478+
479+
# functional pseudo-element:
480+
# element's attribute by name
481+
def xpath_attr_functional_pseudo_element(self, xpath, arguments):
482+
attribute_name = arguments[0].value
483+
other = XPathExpr('@%s' % attribute_name, '', )
484+
return xpath.join('/', other)
485+
486+
# pseudo-element:
487+
# element's text() nodes
488+
def xpath_text_node_simple_pseudo_element(self, xpath):
489+
other = XPathExpr('text()', '', )
490+
return xpath.join('/', other)
491+
492+
# pseudo-element:
493+
# element's href attribute
494+
def xpath_attr_href_simple_pseudo_element(self, xpath):
495+
other = XPathExpr('@href', '', )
496+
return xpath.join('/', other)
497+
498+
def xpath(css):
499+
return _unicode(CustomTranslator().css_to_xpath(css))
500+
501+
assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]"
502+
assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]"
503+
assert xpath('::attr(href)') == "descendant-or-self::*/@href"
504+
assert xpath('::text-node') == "descendant-or-self::*/text()"
505+
assert xpath('::attr-href') == "descendant-or-self::*/@href"
506+
assert xpath('p img::attr(src)') == (
507+
"descendant-or-self::p/descendant-or-self::*/img/@src")
508+
439509
def test_series(self):
440510
def series(css):
441511
selector, = parse(':nth-child(%s)' % css)

cssselect/xpath.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626
_unicode = str
2727

2828

29+
def _unicode_safe_getattr(obj, name, default=None):
30+
# getattr() with a non-ASCII name fails on Python 2.x
31+
name = name.encode('ascii', 'replace').decode('ascii')
32+
return getattr(obj, name, default)
33+
34+
2935
class ExpressionError(SelectorError, RuntimeError):
3036
"""Unknown or unsupported selector (eg. pseudo-class)."""
3137

@@ -178,14 +184,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
178184
The equivalent XPath 1.0 expression as an Unicode string.
179185
180186
"""
181-
selectors = parse(css)
182-
for selector in selectors:
183-
if selector.pseudo_element:
184-
raise ExpressionError('Pseudo-elements are not supported.')
185-
186187
return ' | '.join(
187188
self.selector_to_xpath(selector, prefix)
188-
for selector in selectors)
189+
for selector in parse(css))
189190

190191
def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
191192
"""Translate a parsed selector to XPath.
@@ -207,8 +208,21 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
207208
raise TypeError('Expected a parsed selector, got %r' % (selector,))
208209
xpath = self.xpath(tree)
209210
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
211+
if selector.pseudo_element:
212+
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
210213
return (prefix or '') + _unicode(xpath)
211214

215+
def xpath_pseudo_element(self, xpath, pseudo_element):
216+
"""Translate a pseudo-element.
217+
218+
Defaults to not supporting pseudo-elements at all,
219+
but can be overridden by sub-classes.
220+
221+
"""
222+
if pseudo_element:
223+
raise ExpressionError('Pseudo-elements are not supported.')
224+
return xpath
225+
212226
@staticmethod
213227
def xpath_literal(s):
214228
s = _unicode(s)
@@ -253,9 +267,7 @@ def xpath_negation(self, negation):
253267
def xpath_function(self, function):
254268
"""Translate a functional pseudo-class."""
255269
method = 'xpath_%s_function' % function.name.replace('-', '_')
256-
# getattr() with a non-ASCII name fails on Python 2.x
257-
method = method.encode('ascii', 'replace').decode('ascii')
258-
method = getattr(self, method, None)
270+
method = _unicode_safe_getattr(self, method, None)
259271
if not method:
260272
raise ExpressionError(
261273
"The pseudo-class :%s() is unknown" % function.name)
@@ -264,9 +276,7 @@ def xpath_function(self, function):
264276
def xpath_pseudo(self, pseudo):
265277
"""Translate a pseudo-class."""
266278
method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
267-
# getattr() with a non-ASCII name fails on Python 2.x
268-
method = method.encode('ascii', 'replace').decode('ascii')
269-
method = getattr(self, method, None)
279+
method = _unicode_safe_getattr(self, method, None)
270280
if not method:
271281
# TODO: better error message for pseudo-elements?
272282
raise ExpressionError(

0 commit comments

Comments
 (0)