Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add suppport for [of S]? part in nth-child's arguments #120

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Next Next commit
add suppport for [of S]? part in nth-child's arguments
annbgn committed Jul 26, 2021
commit 2722ae66fda4925ace25e640b2073be451a89f5d
59 changes: 48 additions & 11 deletions cssselect/parser.py
Original file line number Diff line number Diff line change
@@ -161,16 +161,18 @@ def __init__(self, name, arguments):
self.arguments = arguments

def __repr__(self):
return '%s[::%s(%r)]' % (
self.__class__.__name__, self.name,
[token.value for token in self.arguments])
return "%s[::%s(%r)]" % (
self.__class__.__name__,
self.name,
[token.value for token in self.arguments[0]],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line rang a bell to me. With no other changes to the class, it means that the arguments argument is now expected to be formatted differently, which breaks backward compatibility. Consider the following diff:

diff --git cssselect/parser.py cssselect/parser.py
index 4351f07..48e7604 100644
--- cssselect/parser.py
+++ cssselect/parser.py
@@ -160,6 +160,7 @@ class FunctionalPseudoElement(object):
     def __init__(self, name, arguments):
         self.name = ascii_lower(name)
         self.arguments = arguments
+        print("arguments:", arguments)
 
     def __repr__(self):
         return "%s[::%s(%r)]" % (

Then at the current master branch (9edc6c3):

In [1]: from cssselect import parse

In [2]: p = parse("a::asdf(arg1)")
arguments: [<IDENT 'arg1' at 8>]

While at the current add_support_for_of_s_part_in_nth_child branch (058b6b5):

In [1]: from cssselect import parse

In [2]: p = parse("a::asdf(arg1)")
arguments: ([<IDENT 'arg1' at 8>], None)

Could you explain the rationale of this change? Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, the backward compatibility breaks, because I changed signature of method parse_arguments which is used both for functions and functional pseudo elements.
Now I splitted parsing arguments into separate methods, so nothing will break
Nice catch!

)

def argument_types(self):
return [token.type for token in self.arguments]

def canonical(self):
args = ''.join(token.css() for token in self.arguments)
return '%s(%s)' % (self.name, args)
args = "".join(token.css() for token in self.arguments[0])
return "%s(%s)" % (self.name, args)

def specificity(self):
a, b, c = self.selector.specificity()
@@ -182,12 +184,27 @@ class Function(object):
"""
Represents selector:name(expr)
"""
def __init__(self, selector, name, arguments):

def __init__(self, selector, name, arguments, of_type=None):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments

# for css4 :nth-child(An+B of Subselector)
try:
self.of_type = of_type[0]
except (IndexError, TypeError):
self.of_type = None

def __repr__(self):
if self.of_type:
return "%s[%r:%s(%r of %s)]" % (
self.__class__.__name__,
self.selector,
self.name,
[token.value for token in self.arguments],
self.of_type.__repr__(),
)
return '%s[%r:%s(%r)]' % (
self.__class__.__name__, self.selector, self.name,
[token.value for token in self.arguments])
@@ -539,7 +556,8 @@ def parse_simple_selector(stream, inside_negation=False):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
else:
result = Function(result, ident, parse_arguments(stream))
arguments, of_type = parse_arguments(stream)
result = Function(result, ident, arguments, of_type)
else:
raise SelectorSyntaxError(
"Expected selector, got %s" % (peek,))
@@ -554,16 +572,33 @@ def parse_arguments(stream):
while 1:
stream.skip_whitespace()
next = stream.next()
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
('DELIM', '+'), ('DELIM', '-')]:
if next == ("IDENT", "of"):
stream.skip_whitespace()
of_type = parse_of_type(stream)
return arguments, of_type
elif next.type in ("IDENT", "STRING", "NUMBER") or next in [
("DELIM", "+"),
("DELIM", "-"),
]:
arguments.append(next)
elif next == ('DELIM', ')'):
return arguments
return arguments, None
else:
raise SelectorSyntaxError(
"Expected an argument, got %s" % (next,))


def parse_of_type(stream):
subselector = ""
while 1:
next = stream.next()
if next == ("DELIM", ")"):
break
subselector += next.value
result = parse(subselector)
return result


def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
@@ -620,6 +655,7 @@ def parse_series(tokens):
for token in tokens:
if token.type == 'STRING':
raise ValueError('String tokens not allowed in series.')

s = ''.join(token.value for token in tokens).strip()
if s == 'odd':
return 2, 1
@@ -630,7 +666,7 @@ def parse_series(tokens):
if 'n' not in s:
# Just b
return 0, int(s)
a, b = s.split('n', 1)
a, b = s.split("n", 1)
if not a:
a = 1
elif a == '-' or a == '+':
@@ -641,6 +677,7 @@ def parse_series(tokens):
b = 0
else:
b = int(b)

return a, b


4 changes: 3 additions & 1 deletion cssselect/xpath.py
Original file line number Diff line number Diff line change
@@ -439,7 +439,9 @@ def xpath_nth_child_function(self, xpath, function, last=False,
# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
# nth_of_type() calls nth_child(add_name_test=False)
if add_name_test:
if function.of_type:
nodetest = self.xpath(function.of_type.parsed_tree)
elif add_name_test:
nodetest = '*'
else:
nodetest = '%s' % xpath.element
15 changes: 14 additions & 1 deletion tests/test_cssselect.py
Original file line number Diff line number Diff line change
@@ -406,6 +406,19 @@ def xpath(css):
"@hreflang = 'en' or starts-with(@hreflang, 'en-'))]")

# --- nth-* and nth-last-* -------------------------------------
assert (
xpath("e:nth-child(2n+1 of S)")
== "e[count(preceding-sibling::S) mod 2 = 0]"
)
assert (
xpath("e:nth-of-type(2n+1 of S)")
== "e[count(preceding-sibling::S) mod 2 = 0]"
)
assert (
xpath('e:nth-child(2n+1 of li.important)')
== "e[count(preceding-sibling::li[@class and contains(concat(' ', normalize-space(@class), ' '), ' important ')]) mod 2 = 0]"
)

assert xpath('e:nth-child(1)') == (
"e[count(preceding-sibling::*) = 0]")

@@ -606,7 +619,7 @@ def xpath_five_attributes_pseudo(self, xpath):
# functional pseudo-element:
# element's attribute by name
def xpath_attr_functional_pseudo_element(self, xpath, arguments):
attribute_name = arguments[0].value
attribute_name = arguments[0][0].value
other = XPathExpr('@%s' % attribute_name, '', )
return xpath.join('/', other)