Skip to content

Commit aaa3eb1

Browse files
committed
scripts: kconfig: improve search results ordering using scoring
This implements the same improved sorting algorithm as was recently added to the documentation (see zephyrproject-rtos#98016), namely: - A match in a Kconfig symbol's name is given more weight than a match in its prompt. - Field-length normalization is applied so that the shorter the field, the higher its relevance (e.g. searching for "sensor" will now basically yield CONFIG_SENSOR as the top result as the query basically matches 100% of the symbol's name. Signed-off-by: Benjamin Cabé <[email protected]>
1 parent 51b424e commit aaa3eb1

File tree

3 files changed

+87
-84
lines changed

3 files changed

+87
-84
lines changed

scripts/kconfig/config_utils.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright The Zephyr Project Contributors
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
"""
7+
Common utilities for Kconfig configuration interfaces.
8+
9+
This module provides shared functionality for menuconfig.py and guiconfig.py.
10+
"""
11+
12+
import re
13+
14+
from kconfiglib import Symbol, Choice
15+
16+
17+
def score_search_matches(search_str, nodes):
18+
"""
19+
Scores and sorts search results for Kconfig nodes based on relevance.
20+
21+
This implements a basic scoring system where:
22+
- A match in a symbol's name is given more weight than a match in its prompt
23+
- Field-length normalization is applied so that the shorter the field, the higher its relevance
24+
25+
Args:
26+
search_str: The search string (space-separated regexes)
27+
nodes: List of MenuNode objects to search through
28+
29+
Returns:
30+
List of tuples (node, score) sorted by score (highest first)
31+
"""
32+
# Parse the search string into regexes
33+
try:
34+
regexes = [re.compile(regex.lower()) for regex in search_str.split()]
35+
# If no regexes, all nodes match, order is unchanged
36+
if len(regexes) == 0:
37+
return [(node, 1) for node in nodes]
38+
except re.error:
39+
# Invalid regex - return empty results
40+
return []
41+
42+
NAME_WEIGHT = 2.0
43+
PROMPT_WEIGHT = 1.0
44+
45+
scored_results = []
46+
47+
for node in nodes:
48+
# Get lowercase versions for matching
49+
name = ""
50+
if isinstance(node.item, (Symbol, Choice)):
51+
name = node.item.name.lower() if node.item.name else ""
52+
53+
prompt = node.prompt[0].lower() if node.prompt else ""
54+
55+
# Check if all regexes match in either name or prompt
56+
name_matches = name and all(regex.search(name) for regex in regexes)
57+
prompt_matches = prompt and all(regex.search(prompt) for regex in regexes)
58+
59+
if not (name_matches or prompt_matches):
60+
continue
61+
62+
# Apply field-length normalization (shorter fields = higher relevance)
63+
score = 0
64+
if name_matches:
65+
score += NAME_WEIGHT / (len(name) ** 0.5)
66+
if prompt_matches:
67+
score += PROMPT_WEIGHT / (len(prompt) ** 0.5)
68+
69+
scored_results.append((node, score))
70+
71+
# Sort by score (highest first)
72+
scored_results.sort(key=lambda x: x[1], reverse=True)
73+
74+
return scored_results

scripts/kconfig/guiconfig.py

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
TRI_TO_STR, TYPE_TO_STR, \
8888
standard_kconfig, standard_config_filename
8989

90+
from config_utils import score_search_matches
91+
9092

9193
# If True, use GIF image data embedded in this file instead of separate GIF
9294
# files. See _load_images().
@@ -1859,12 +1861,11 @@ def _update_jump_to_matches(msglabel, search_string):
18591861
_jump_to_tree.selection_set(())
18601862

18611863
try:
1862-
# We could use re.IGNORECASE here instead of lower(), but this is
1863-
# faster for regexes like '.*debug$' (though the '.*' is redundant
1864-
# there). Those probably have bad interactions with re.search(), which
1865-
# matches anywhere in the string.
1866-
regex_searches = [re.compile(regex).search
1867-
for regex in search_string.lower().split()]
1864+
scored_sc_nodes = score_search_matches(search_string, _sorted_sc_nodes())
1865+
scored_menu_comment_nodes = score_search_matches(search_string, _sorted_menu_comment_nodes())
1866+
1867+
_jump_to_matches = [node for node, _ in scored_sc_nodes + scored_menu_comment_nodes]
1868+
18681869
except re.error as e:
18691870
msg = "Bad regular expression"
18701871
# re.error.msg was added in Python 3.5
@@ -1875,39 +1876,6 @@ def _update_jump_to_matches(msglabel, search_string):
18751876
_jump_to_tree.set_children("")
18761877
return
18771878

1878-
_jump_to_matches = []
1879-
add_match = _jump_to_matches.append
1880-
1881-
for node in _sorted_sc_nodes():
1882-
# Symbol/choice
1883-
sc = node.item
1884-
1885-
for search in regex_searches:
1886-
# Both the name and the prompt might be missing, since
1887-
# we're searching both symbols and choices
1888-
1889-
# Does the regex match either the symbol name or the
1890-
# prompt (if any)?
1891-
if not (sc.name and search(sc.name.lower()) or
1892-
node.prompt and search(node.prompt[0].lower())):
1893-
1894-
# Give up on the first regex that doesn't match, to
1895-
# speed things up a bit when multiple regexes are
1896-
# entered
1897-
break
1898-
1899-
else:
1900-
add_match(node)
1901-
1902-
# Search menus and comments
1903-
1904-
for node in _sorted_menu_comment_nodes():
1905-
for search in regex_searches:
1906-
if not search(node.prompt[0].lower()):
1907-
break
1908-
else:
1909-
add_match(node)
1910-
19111879
msglabel["text"] = "" if _jump_to_matches else "No matches"
19121880

19131881
_update_jump_to_display()

scripts/kconfig/menuconfig.py

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@
223223
TRI_TO_STR, TYPE_TO_STR, \
224224
standard_kconfig, standard_config_filename
225225

226+
from config_utils import score_search_matches
227+
226228

227229
#
228230
# Configuration variables
@@ -2078,54 +2080,13 @@ def select_prev_match():
20782080
prev_s = s
20792081

20802082
try:
2081-
# We could use re.IGNORECASE here instead of lower(), but this
2082-
# is noticeably less jerky while inputting regexes like
2083-
# '.*debug$' (though the '.*' is redundant there). Those
2084-
# probably have bad interactions with re.search(), which
2085-
# matches anywhere in the string.
2086-
#
2087-
# It's not horrible either way. Just a bit smoother.
2088-
regex_searches = [re.compile(regex).search
2089-
for regex in s.lower().split()]
2090-
2091-
# No exception thrown, so the regexes are okay
2083+
# Use the scoring function for symbols and choices
20922084
bad_re = None
20932085

2094-
# List of matching nodes
2095-
matches = []
2096-
add_match = matches.append
2097-
2098-
# Search symbols and choices
2099-
2100-
for node in _sorted_sc_nodes():
2101-
# Symbol/choice
2102-
sc = node.item
2103-
2104-
for search in regex_searches:
2105-
# Both the name and the prompt might be missing, since
2106-
# we're searching both symbols and choices
2107-
2108-
# Does the regex match either the symbol name or the
2109-
# prompt (if any)?
2110-
if not (sc.name and search(sc.name.lower()) or
2111-
node.prompt and search(node.prompt[0].lower())):
2112-
2113-
# Give up on the first regex that doesn't match, to
2114-
# speed things up a bit when multiple regexes are
2115-
# entered
2116-
break
2117-
2118-
else:
2119-
add_match(node)
2120-
2121-
# Search menus and comments
2086+
scored_sc_nodes = score_search_matches(s, _sorted_sc_nodes())
2087+
scored_menu_comment_nodes = score_search_matches(s, _sorted_menu_comment_nodes())
21222088

2123-
for node in _sorted_menu_comment_nodes():
2124-
for search in regex_searches:
2125-
if not search(node.prompt[0].lower()):
2126-
break
2127-
else:
2128-
add_match(node)
2089+
matches = [node for node, _ in scored_sc_nodes + scored_menu_comment_nodes]
21292090

21302091
except re.error as e:
21312092
# Bad regex. Remember the error message so we can show it.

0 commit comments

Comments
 (0)