Skip to content

Commit 1a0e781

Browse files
committed
scripts: kconfig: improve search results ordering using scoring
This implements the same improved sorting algorithm as was recently added to the documentation (see #98016), namely: - A match in a Kconfig symbol's name is given more weight than a match in its prompt. - Field-length normalization is applied so that the shorter the field, the higher its relevance (e.g. searching for "sensor" will now basically yield CONFIG_SENSOR as the top result as the query basically matches 100% of the symbol's name. Signed-off-by: Benjamin Cabé <[email protected]>
1 parent 8056743 commit 1a0e781

File tree

3 files changed

+87
-84
lines changed

3 files changed

+87
-84
lines changed

scripts/kconfig/config_utils.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright The Zephyr Project Contributors
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
"""
7+
Common utilities for Kconfig configuration interfaces.
8+
9+
This module provides shared functionality for menuconfig.py and guiconfig.py.
10+
"""
11+
12+
import re
13+
14+
from kconfiglib import Symbol, Choice
15+
16+
17+
def score_search_matches(search_str, nodes):
18+
"""
19+
Scores and sorts search results for Kconfig nodes based on relevance.
20+
21+
This implements a basic scoring system where:
22+
- A match in a symbol's name is given more weight than a match in its prompt
23+
- Field-length normalization is applied so that the shorter the field, the higher its relevance
24+
25+
Args:
26+
search_str: The search string (space-separated regexes)
27+
nodes: List of MenuNode objects to search through
28+
29+
Returns:
30+
List of tuples (node, score) sorted by score (highest first)
31+
"""
32+
# Parse the search string into regexes
33+
try:
34+
regexes = [re.compile(regex.lower()) for regex in search_str.split()]
35+
# If no regexes, all nodes match, order is unchanged
36+
if len(regexes) == 0:
37+
return [(node, 1) for node in nodes]
38+
except re.error:
39+
# Invalid regex - return empty results
40+
return []
41+
42+
NAME_WEIGHT = 2.0
43+
PROMPT_WEIGHT = 1.0
44+
45+
scored_results = []
46+
47+
for node in nodes:
48+
# Get lowercase versions for matching
49+
name = ""
50+
if isinstance(node.item, (Symbol, Choice)):
51+
name = node.item.name.lower() if node.item.name else ""
52+
53+
prompt = node.prompt[0].lower() if node.prompt else ""
54+
55+
# Check if all regexes match in either name or prompt
56+
name_matches = name and all(regex.search(name) for regex in regexes)
57+
prompt_matches = prompt and all(regex.search(prompt) for regex in regexes)
58+
59+
if not (name_matches or prompt_matches):
60+
continue
61+
62+
# Apply field-length normalization (shorter fields = higher relevance)
63+
score = 0
64+
if name_matches:
65+
score += NAME_WEIGHT / (len(name) ** 0.5)
66+
if prompt_matches:
67+
score += PROMPT_WEIGHT / (len(prompt) ** 0.5)
68+
69+
scored_results.append((node, score))
70+
71+
# Sort by score (highest first)
72+
scored_results.sort(key=lambda x: x[1], reverse=True)
73+
74+
return scored_results

scripts/kconfig/guiconfig.py

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
TRI_TO_STR, TYPE_TO_STR, \
7777
standard_kconfig, standard_config_filename
7878

79+
from config_utils import score_search_matches
80+
7981

8082
# If True, use GIF image data embedded in this file instead of separate GIF
8183
# files. See _load_images().
@@ -1843,12 +1845,11 @@ def _update_jump_to_matches(msglabel, search_string):
18431845
_jump_to_tree.selection_set(())
18441846

18451847
try:
1846-
# We could use re.IGNORECASE here instead of lower(), but this is
1847-
# faster for regexes like '.*debug$' (though the '.*' is redundant
1848-
# there). Those probably have bad interactions with re.search(), which
1849-
# matches anywhere in the string.
1850-
regex_searches = [re.compile(regex).search
1851-
for regex in search_string.lower().split()]
1848+
scored_sc_nodes = score_search_matches(search_string, _sorted_sc_nodes())
1849+
scored_menu_comment_nodes = score_search_matches(search_string, _sorted_menu_comment_nodes())
1850+
1851+
_jump_to_matches = [node for node, _ in scored_sc_nodes + scored_menu_comment_nodes]
1852+
18521853
except re.error as e:
18531854
msg = "Bad regular expression"
18541855
# re.error.msg was added in Python 3.5
@@ -1859,39 +1860,6 @@ def _update_jump_to_matches(msglabel, search_string):
18591860
_jump_to_tree.set_children("")
18601861
return
18611862

1862-
_jump_to_matches = []
1863-
add_match = _jump_to_matches.append
1864-
1865-
for node in _sorted_sc_nodes():
1866-
# Symbol/choice
1867-
sc = node.item
1868-
1869-
for search in regex_searches:
1870-
# Both the name and the prompt might be missing, since
1871-
# we're searching both symbols and choices
1872-
1873-
# Does the regex match either the symbol name or the
1874-
# prompt (if any)?
1875-
if not (sc.name and search(sc.name.lower()) or
1876-
node.prompt and search(node.prompt[0].lower())):
1877-
1878-
# Give up on the first regex that doesn't match, to
1879-
# speed things up a bit when multiple regexes are
1880-
# entered
1881-
break
1882-
1883-
else:
1884-
add_match(node)
1885-
1886-
# Search menus and comments
1887-
1888-
for node in _sorted_menu_comment_nodes():
1889-
for search in regex_searches:
1890-
if not search(node.prompt[0].lower()):
1891-
break
1892-
else:
1893-
add_match(node)
1894-
18951863
msglabel["text"] = "" if _jump_to_matches else "No matches"
18961864

18971865
_update_jump_to_display()

scripts/kconfig/menuconfig.py

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@
223223
TRI_TO_STR, TYPE_TO_STR, \
224224
standard_kconfig, standard_config_filename
225225

226+
from config_utils import score_search_matches
227+
226228

227229
#
228230
# Configuration variables
@@ -2077,54 +2079,13 @@ def select_prev_match():
20772079
prev_s = s
20782080

20792081
try:
2080-
# We could use re.IGNORECASE here instead of lower(), but this
2081-
# is noticeably less jerky while inputting regexes like
2082-
# '.*debug$' (though the '.*' is redundant there). Those
2083-
# probably have bad interactions with re.search(), which
2084-
# matches anywhere in the string.
2085-
#
2086-
# It's not horrible either way. Just a bit smoother.
2087-
regex_searches = [re.compile(regex).search
2088-
for regex in s.lower().split()]
2089-
2090-
# No exception thrown, so the regexes are okay
2082+
# Use the scoring function for symbols and choices
20912083
bad_re = None
20922084

2093-
# List of matching nodes
2094-
matches = []
2095-
add_match = matches.append
2096-
2097-
# Search symbols and choices
2098-
2099-
for node in _sorted_sc_nodes():
2100-
# Symbol/choice
2101-
sc = node.item
2102-
2103-
for search in regex_searches:
2104-
# Both the name and the prompt might be missing, since
2105-
# we're searching both symbols and choices
2106-
2107-
# Does the regex match either the symbol name or the
2108-
# prompt (if any)?
2109-
if not (sc.name and search(sc.name.lower()) or
2110-
node.prompt and search(node.prompt[0].lower())):
2111-
2112-
# Give up on the first regex that doesn't match, to
2113-
# speed things up a bit when multiple regexes are
2114-
# entered
2115-
break
2116-
2117-
else:
2118-
add_match(node)
2119-
2120-
# Search menus and comments
2085+
scored_sc_nodes = score_search_matches(s, _sorted_sc_nodes())
2086+
scored_menu_comment_nodes = score_search_matches(s, _sorted_menu_comment_nodes())
21212087

2122-
for node in _sorted_menu_comment_nodes():
2123-
for search in regex_searches:
2124-
if not search(node.prompt[0].lower()):
2125-
break
2126-
else:
2127-
add_match(node)
2088+
matches = [node for node, _ in scored_sc_nodes + scored_menu_comment_nodes]
21282089

21292090
except re.error as e:
21302091
# Bad regex. Remember the error message so we can show it.

0 commit comments

Comments
 (0)