Skip to content

Commit 3d8fd09

Browse files
committed
Implement :link per the HTML5 spec.
1 parent c6137ce commit 3d8fd09

File tree

3 files changed

+36
-14
lines changed

3 files changed

+36
-14
lines changed

CHANGES

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ Version 0.5
77
Not released yet.
88

99
* Fix case sensitivity issues.
10-
* Add the ``xhtml`` parameter for :class:`HTMLTranslator`.
10+
* Implement :class:`HTMLTranslator` based on the `HTML5 specification`_
11+
rather than guessing; add the ``xhtml`` parameter.
12+
13+
.. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
1114

1215

1316
Version 0.4

cssselect/tests.py

+25-10
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import operator
2121
import unittest
2222

23-
from lxml import html
23+
from lxml import etree, html
2424
from cssselect import (parse, GenericTranslator, HTMLTranslator,
2525
SelectorSyntaxError, ExpressionError)
2626
from cssselect.parser import tokenize, parse_series
@@ -401,7 +401,7 @@ def test_series(self):
401401
assert parse_series('5') == (0, 5)
402402

403403
def test_select(self):
404-
document = html.document_fromstring(HTML_IDS)
404+
document = etree.fromstring(HTML_IDS)
405405
sort_key = dict(
406406
(el, count) for count, el in enumerate(document.getiterator())
407407
).__getitem__
@@ -426,8 +426,9 @@ def pcss(main, *selectors, **kwargs):
426426
return result
427427

428428
all_ids = pcss('*')
429-
assert len(all_ids) == 27
430-
assert all_ids[:4] == ['html', 'nil', 'nil', 'outer-div']
429+
assert len(all_ids) == 32
430+
assert all_ids[:6] == [
431+
'html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div']
431432
assert all_ids[-1:] == ['foobar-span']
432433
assert pcss('div') == ['outer-div', 'li-div', 'foobar-div']
433434
assert pcss('DIV', html_only=True) == [
@@ -503,9 +504,16 @@ def pcss(main, *selectors, **kwargs):
503504
assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li']
504505
assert pcss('#outer-div:first-child') == ['outer-div']
505506
assert pcss('#outer-div :first-child') == [
506-
'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled']
507+
'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled',
508+
'area-href']
507509
assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor']
508-
assert pcss(':link', html_only=True) == pcss('a[href]')
510+
511+
512+
assert pcss(':link', html_only=True) == [
513+
'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']
514+
assert pcss(':visited', html_only=True) == []
515+
516+
509517
assert pcss(':checked', html_only=True) == ['checkbox-checked']
510518
assert pcss(':disabled', html_only=True) == [
511519
'fieldset', 'checkbox-disabled']
@@ -590,7 +598,10 @@ def count(selector):
590598
assert count('div[class~=dialog]') == 51 # ? Seems right
591599

592600
HTML_IDS = '''
593-
<html id="html"><head></head><body>
601+
<html id="html"><head>
602+
<link id="link-href" href="foo" />
603+
<link id="link-nohref" />
604+
</head><body>
594605
<div id="outer-div">
595606
<a id="name-anchor" name="foo"></a>
596607
<a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
@@ -612,14 +623,18 @@ def count(selector):
612623
<p id="paragraph">
613624
<b id="p-b">hi</b> <em id="p-em">there</em>
614625
<b id="p-b2">guy</b>
615-
<input type="checkbox" id="checkbox-unchecked">
616-
<input type="checkbox" id="checkbox-checked" checked="checked">
626+
<input type="checkbox" id="checkbox-unchecked" />
627+
<input type="checkbox" id="checkbox-checked" checked="checked" />
617628
<fieldset id="fieldset" disabled="disabled">
618-
<input type="checkbox" id="checkbox-disabled">
629+
<input type="checkbox" id="checkbox-disabled" />
619630
</fieldset>
620631
</p>
621632
<ol id="second-ol">
622633
</ol>
634+
<map name="dummymap">
635+
<area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
636+
<area shape="default" id="area-nohref" />
637+
</map>
623638
</div>
624639
<div id="foobar-div" foobar="ab bc
625640
cde"><span id="foobar-span"></span></div>

cssselect/xpath.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -491,10 +491,13 @@ class HTMLTranslator(GenericTranslator):
491491
"""
492492
Translator for (X)HTML documents.
493493
494-
Has a more useful implementation of some pseudo-classes, based on
495-
HTML-specific element names and attribute names.
494+
Has a more useful implementation of some pseudo-classes based on
495+
HTML-specific element names and attribute names, as described in
496+
the `HTML5 specification`_. It assumes no-quirks mode.
496497
The API is the same as :class:`GenericTranslator`.
497498
499+
.. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
500+
498501
:param xhtml:
499502
If false (the default), element names and attribute names
500503
are case-insensitive.
@@ -514,7 +517,8 @@ def xpath_checked_pseudo(self, xpath):
514517
"(@checked and name(.) = 'input')")
515518

516519
def xpath_link_pseudo(self, xpath):
517-
return xpath.add_condition("@href and name(.) = 'a'")
520+
return xpath.add_condition("@href and "
521+
"(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
518522

519523
# Links are never visited, the implementation for :visited is the same
520524
# as in GenericTranslator

0 commit comments

Comments
 (0)