Skip to content

Implement GH-18550: Implement getElementsByClassName() #19108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ PHP NEWS

- DOM:
. Make cloning DOM node lists, maps, and collections fail. (nielsdos)
. Added Dom\Element::getElementsByClassName(). (nielsdos)

- PDO_ODBC
. Fetch larger block sizes and better handle SQL_NO_TOTAL when calling
Expand Down
1 change: 1 addition & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ PHP 8.5 UPGRADE NOTES
RFC: https://wiki.php.net/rfc/curl_share_persistence_improvement

- DOM:
. Added Dom\Element::getElementsByClassName().
. Added Dom\Element::insertAdjacentHTML().

- Enchant:
Expand Down
38 changes: 38 additions & 0 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,44 @@ PHP_METHOD(Dom_Element, getElementsByTagName)
}
/* }}} end dom_element_get_elements_by_tag_name */

PHP_METHOD(Dom_Element, getElementsByClassName)
{
dom_object *intern, *namednode;
zend_string *class_names;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "P", &class_names) == FAILURE) {
RETURN_THROWS();
}

if (ZSTR_LEN(class_names) > INT_MAX) {
zend_argument_value_error(1, "is too long");
RETURN_THROWS();
}

DOM_GET_THIS_INTERN(intern);

object_init_ex(return_value, dom_html_collection_class_entry);
namednode = Z_DOMOBJ_P(return_value);

HashTable *token_set;
ALLOC_HASHTABLE(token_set);
zend_hash_init(token_set, 0, NULL, NULL, false);
dom_ordered_set_parser(token_set, ZSTR_VAL(class_names), intern->document->quirks_mode == PHP_LIBXML_QUIRKS);

if (zend_hash_num_elements(token_set) == 0) {
php_dom_create_obj_map(intern, namednode, NULL, NULL, NULL, &php_dom_obj_map_noop);

zend_hash_destroy(token_set);
FREE_HASHTABLE(token_set);
} else {
php_dom_create_obj_map(intern, namednode, NULL, NULL, NULL, &php_dom_obj_map_by_class_name);

dom_nnodemap_object *map = namednode->ptr;
map->array = token_set;
map->release_array = true;
}
}

/* should_free_result must be initialized to false */
static const xmlChar *dom_get_attribute_ns(dom_object *intern, xmlNodePtr elemp, const char *uri, size_t uri_len, const char *name, bool *should_free_result)
{
Expand Down
101 changes: 100 additions & 1 deletion ext/dom/obj_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "obj_map.h"
#include "token_list.h"

static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
{
Expand All @@ -40,6 +41,30 @@ static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap)
objmap->cached_length = -1;
}

static bool dom_matches_class_name(const dom_nnodemap_object *map, const xmlNode *nodep)
{
bool ret = false;

if (nodep->type == XML_ELEMENT_NODE) {
xmlAttrPtr classes = xmlHasNsProp(nodep, BAD_CAST "class", NULL);
if (classes != NULL) {
bool should_free;
xmlChar *value = php_libxml_attr_value(classes, &should_free);

bool quirks = map->baseobj->document->quirks_mode == PHP_LIBXML_QUIRKS;
if (dom_ordered_set_all_contained(map->array, (const char *) value, quirks)) {
ret = true;
}

if (should_free) {
xmlFree(value);
}
}
}

return ret;
}

/**************************
* === Length methods === *
**************************/
Expand Down Expand Up @@ -106,6 +131,24 @@ static zend_long dom_map_get_by_tag_name_length(dom_nnodemap_object *map)
return count;
}

static zend_long dom_map_get_by_class_name_length(dom_nnodemap_object *map)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
zend_long count = 0;
if (nodep) {
xmlNodePtr basep = nodep;
nodep = php_dom_first_child_of_container_node(basep);

while (nodep != NULL) {
if (dom_matches_class_name(map, nodep)) {
count++;
}
nodep = php_dom_next_in_tree_order(nodep, basep);
}
}
return count;
}

static zend_long dom_map_get_zero_length(dom_nnodemap_object *map)
{
return 0;
Expand Down Expand Up @@ -276,6 +319,10 @@ static void dom_map_collection_named_item_elements_iter(dom_nnodemap_object *map
}
}

static void dom_map_collection_named_item_null(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
}

static void dom_map_get_by_tag_name_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
Expand All @@ -292,12 +339,54 @@ static void dom_map_get_by_tag_name_item(dom_nnodemap_object *map, zend_long ind
}
}

static void dom_map_get_by_class_name_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
xmlNodePtr itemnode = NULL;
if (nodep && index >= 0) {
dom_node_idx_pair start_point = dom_obj_map_get_start_point(map, nodep, index);
if (start_point.node) {
if (start_point.index > 0) {
/* Only start iteration at next point if we actually have an index to seek to. */
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I did a correction here because we don't want to immediately skip the current index if the current index is already the one we need.

itemnode = php_dom_next_in_tree_order(start_point.node, nodep);
} else {
itemnode = start_point.node;
}
} else {
itemnode = php_dom_first_child_of_container_node(nodep);
}

do {
--start_point.index;
while (itemnode != NULL && !dom_matches_class_name(map, itemnode)) {
itemnode = php_dom_next_in_tree_order(itemnode, nodep);
}
} while (start_point.index > 0 && itemnode);
}
dom_ret_node_to_zobj(map, itemnode, return_value);
if (itemnode) {
dom_map_cache_obj(map, itemnode, index, return_value);
}
}

static void dom_map_collection_named_item_by_tag_name_iter(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
iter->candidate = dom_get_elements_by_tag_name_ns_raw(iter->basep, iter->candidate, map->ns, map->local, map->local_lower, &iter->cur, iter->next);
iter->next = iter->cur + 1;
}

static void dom_map_collection_named_item_by_class_name_iter(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
xmlNodePtr basep = iter->basep;
xmlNodePtr nodep = iter->candidate ? php_dom_next_in_tree_order(iter->candidate, basep) : php_dom_first_child_of_container_node(basep);

while (nodep != NULL && !dom_matches_class_name(map, nodep)) {
nodep = php_dom_next_in_tree_order(nodep, basep);
}

iter->candidate = nodep;
}

static void dom_map_get_null_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
RETURN_NULL();
Expand Down Expand Up @@ -478,6 +567,16 @@ const php_dom_obj_map_handler php_dom_obj_map_by_tag_name = {
.nameless = true,
};

const php_dom_obj_map_handler php_dom_obj_map_by_class_name = {
.length = dom_map_get_by_class_name_length,
.get_item = dom_map_get_by_class_name_item,
.get_ns_named_item = dom_map_get_ns_named_item_null,
.has_ns_named_item = dom_map_has_ns_named_item_null,
.collection_named_item_iter = dom_map_collection_named_item_by_class_name_iter,
.use_cache = true,
.nameless = true,
};

const php_dom_obj_map_handler php_dom_obj_map_child_nodes = {
.length = dom_map_get_nodes_length,
.get_item = dom_map_get_nodes_item,
Expand Down Expand Up @@ -533,7 +632,7 @@ const php_dom_obj_map_handler php_dom_obj_map_noop = {
.get_item = dom_map_get_null_item,
.get_ns_named_item = dom_map_get_ns_named_item_null,
.has_ns_named_item = dom_map_has_ns_named_item_null,
.collection_named_item_iter = NULL,
.collection_named_item_iter = dom_map_collection_named_item_null,
.use_cache = false,
.nameless = true,
};
Expand Down
1 change: 1 addition & 0 deletions ext/dom/obj_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ zend_long php_dom_get_nodelist_length(dom_object *obj);

extern const php_dom_obj_map_handler php_dom_obj_map_attributes;
extern const php_dom_obj_map_handler php_dom_obj_map_by_tag_name;
extern const php_dom_obj_map_handler php_dom_obj_map_by_class_name;
extern const php_dom_obj_map_handler php_dom_obj_map_child_elements;
extern const php_dom_obj_map_handler php_dom_obj_map_child_nodes;
extern const php_dom_obj_map_handler php_dom_obj_map_nodeset;
Expand Down
3 changes: 3 additions & 0 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,7 @@ public function removeAttributeNode(Attr $attr) : Attr {}

public function getElementsByTagName(string $qualifiedName): HTMLCollection {}
public function getElementsByTagNameNS(?string $namespace, string $localName): HTMLCollection {}
public function getElementsByClassName(string $classNames): HTMLCollection {}

public function insertAdjacentElement(AdjacentPosition $where, Element $element): ?Element {}
public function insertAdjacentText(AdjacentPosition $where, string $data): void {}
Expand Down Expand Up @@ -1986,6 +1987,8 @@ abstract class Document extends Node implements ParentNode
public function getElementsByTagName(string $qualifiedName): HTMLCollection {}
/** @implementation-alias Dom\Element::getElementsByTagNameNS */
public function getElementsByTagNameNS(?string $namespace, string $localName): HTMLCollection {}
/** @implementation-alias Dom\Element::getElementsByClassName */
public function getElementsByClassName(string $classNames): HTMLCollection {}

public function createElement(string $localName): Element {}
public function createElementNS(?string $namespace, string $qualifiedName): Element {}
Expand Down
11 changes: 10 additions & 1 deletion ext/dom/php_dom_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
--TEST--
Dom\Element::getElementsByClassName() empty class names
--EXTENSIONS--
dom
--FILE--
<?php

$dom = Dom\HTMLDocument::createFromString(<<<HTML
<div class=" foo bar ">
<p id="child"></p>
</div>
HTML, LIBXML_NOERROR);

$collection = $dom->documentElement->getElementsByClassName("");
var_dump($collection->count());

foreach ($collection as $node) {
throw new Error("unreachable");
}

var_dump($dom->getElementsByClassName(" ")->count());
var_dump($dom->getElementsByClassName("\t")->count());
var_dump($dom->getElementsByClassName("\t\n\f\v")->count());
var_dump($dom->getElementsByClassName("\t\n\f\v")->namedItem("child"));

?>
--EXPECT--
int(0)
int(0)
int(0)
int(0)
NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
--TEST--
Dom\Element::getElementsByClassName() non quirks mode
--EXTENSIONS--
dom
--FILE--
<?php

$dom = Dom\HTMLDocument::createFromString(<<<HTML
<!DOCTYPE html>
<div id="container">
<p class="Bar">1</p>
<p class="bar">2</p>
<p class="Bar Foo">3</p>
<p class="Bar foo">4</p>
<p class="foo bar">5</p>
<p class="foo bar" name="here">6</p>
</div>
<div>
<p class="Bar">7</p>
<p class="bar">8</p>
<p class="Bar Foo">9</p>
<p class="Bar foo">10</p>
<p class="foo bar">11</p>
</div>
HTML);

$collection = $dom->getElementsByClassName("foo \n bar");

echo "There are {$collection->length} items in the document in total that have both \"foo\" and \"bar\"\n";

$collection = $dom->getElementById('container')->getElementsByClassName("foo \n bar");

echo "There are {$collection->length} items in #container in total that have both \"foo\" and \"bar\"\n";

foreach ($collection as $key => $node) {
echo "--- Key $key ---\n";
var_dump($node->tagName, $node->textContent);
var_dump($node === $collection->item($key));
}

var_dump($collection->namedItem("here")->textContent);

?>
--EXPECT--
There are 3 items in the document in total that have both "foo" and "bar"
There are 2 items in #container in total that have both "foo" and "bar"
--- Key 0 ---
string(1) "P"
string(1) "5"
bool(true)
--- Key 1 ---
string(1) "P"
string(1) "6"
bool(true)
string(1) "6"
Loading