Skip to content

Commit 103e123

Browse files
committed
scripts: kconfig: improve search results ordering using scoring
This implements the same improved sorting algorithm as was recently added to the documentation (see zephyrproject-rtos#98016), namely: - A match in a Kconfig symbol's name is given more weight than a match in its prompt. - Field-length normalization is applied so that the shorter the field, the higher its relevance (e.g. searching for "sensor" will now basically yield CONFIG_SENSOR as the top result as the query basically matches 100% of the symbol's name. Signed-off-by: Benjamin Cabé <[email protected]>
1 parent 51b424e commit 103e123

File tree

3 files changed

+103
-84
lines changed

3 files changed

+103
-84
lines changed

scripts/kconfig/config_utils.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright The Zephyr Project Contributors
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
"""
7+
Common utilities for Kconfig configuration interfaces.
8+
9+
This module provides shared functionality for menuconfig.py and guiconfig.py.
10+
"""
11+
12+
import re
13+
14+
from kconfiglib import Symbol, Choice
15+
16+
17+
def score_search_matches(search_str, nodes):
18+
"""
19+
Scores and sorts search results for Kconfig nodes based on relevance.
20+
21+
This implements a scoring system similar to modern search engines:
22+
- A match in a symbol's name is given more weight than a match in its prompt
23+
- Field-length normalization is applied so that the shorter the field,
24+
the higher its relevance
25+
26+
Args:
27+
search_str: The search string (space-separated regexes)
28+
nodes: List of MenuNode objects to search through
29+
30+
Returns:
31+
List of tuples (node, score) sorted by score (highest first)
32+
"""
33+
# Parse the search string into regexes
34+
try:
35+
regexes = [re.compile(regex.lower()) for regex in search_str.split()]
36+
except re.error:
37+
# Invalid regex - return empty results
38+
return []
39+
40+
NAME_WEIGHT = 2.0
41+
PROMPT_WEIGHT = 1.0
42+
43+
scored_results = []
44+
45+
for node in nodes:
46+
name_matches = 0
47+
prompt_matches = 0
48+
49+
# Get lowercase versions for matching
50+
name = ""
51+
if isinstance(node.item, Symbol):
52+
name = node.item.name.lower() if node.item.name else ""
53+
elif isinstance(node.item, Choice):
54+
# Choices might have names
55+
name = node.item.name.lower() if node.item.name else ""
56+
57+
prompt = node.prompt[0].lower() if node.prompt else ""
58+
59+
# Count matches for each regex
60+
for regex in regexes:
61+
if name and regex.search(name):
62+
name_matches += 1
63+
if prompt and regex.search(prompt):
64+
prompt_matches += 1
65+
66+
# Only include results that match all regexes
67+
total_matches = max(name_matches, prompt_matches)
68+
if total_matches < len(regexes):
69+
continue
70+
71+
# Apply field-length normalization (the shorter the field, the higher its relevance)
72+
name_field_norm = 1.0 / (len(name) ** 0.5) if name else 0
73+
prompt_field_norm = 1.0 / (len(prompt) ** 0.5) if prompt else 0
74+
75+
# Calculate final score
76+
score = (name_matches * NAME_WEIGHT * name_field_norm) + (
77+
prompt_matches * PROMPT_WEIGHT * prompt_field_norm
78+
)
79+
80+
scored_results.append((node, score))
81+
82+
# Sort by score (highest first)
83+
scored_results.sort(key=lambda x: x[1], reverse=True)
84+
85+
return scored_results

scripts/kconfig/guiconfig.py

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
TRI_TO_STR, TYPE_TO_STR, \
8888
standard_kconfig, standard_config_filename
8989

90+
from config_utils import score_search_matches
91+
9092

9193
# If True, use GIF image data embedded in this file instead of separate GIF
9294
# files. See _load_images().
@@ -1859,12 +1861,14 @@ def _update_jump_to_matches(msglabel, search_string):
18591861
_jump_to_tree.selection_set(())
18601862

18611863
try:
1862-
# We could use re.IGNORECASE here instead of lower(), but this is
1863-
# faster for regexes like '.*debug$' (though the '.*' is redundant
1864-
# there). Those probably have bad interactions with re.search(), which
1865-
# matches anywhere in the string.
1866-
regex_searches = [re.compile(regex).search
1867-
for regex in search_string.lower().split()]
1864+
# Use the scoring function for symbols and choices
1865+
# Collect all nodes to search
1866+
all_nodes = list(_sorted_sc_nodes()) + list(_sorted_menu_comment_nodes())
1867+
1868+
# Score and sort the results
1869+
scored_results = score_search_matches(search_string, all_nodes)
1870+
_jump_to_matches = [node for node, score in scored_results]
1871+
18681872
except re.error as e:
18691873
msg = "Bad regular expression"
18701874
# re.error.msg was added in Python 3.5
@@ -1875,39 +1879,6 @@ def _update_jump_to_matches(msglabel, search_string):
18751879
_jump_to_tree.set_children("")
18761880
return
18771881

1878-
_jump_to_matches = []
1879-
add_match = _jump_to_matches.append
1880-
1881-
for node in _sorted_sc_nodes():
1882-
# Symbol/choice
1883-
sc = node.item
1884-
1885-
for search in regex_searches:
1886-
# Both the name and the prompt might be missing, since
1887-
# we're searching both symbols and choices
1888-
1889-
# Does the regex match either the symbol name or the
1890-
# prompt (if any)?
1891-
if not (sc.name and search(sc.name.lower()) or
1892-
node.prompt and search(node.prompt[0].lower())):
1893-
1894-
# Give up on the first regex that doesn't match, to
1895-
# speed things up a bit when multiple regexes are
1896-
# entered
1897-
break
1898-
1899-
else:
1900-
add_match(node)
1901-
1902-
# Search menus and comments
1903-
1904-
for node in _sorted_menu_comment_nodes():
1905-
for search in regex_searches:
1906-
if not search(node.prompt[0].lower()):
1907-
break
1908-
else:
1909-
add_match(node)
1910-
19111882
msglabel["text"] = "" if _jump_to_matches else "No matches"
19121883

19131884
_update_jump_to_display()

scripts/kconfig/menuconfig.py

Lines changed: 8 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@
223223
TRI_TO_STR, TYPE_TO_STR, \
224224
standard_kconfig, standard_config_filename
225225

226+
from config_utils import score_search_matches
227+
226228

227229
#
228230
# Configuration variables
@@ -2078,54 +2080,15 @@ def select_prev_match():
20782080
prev_s = s
20792081

20802082
try:
2081-
# We could use re.IGNORECASE here instead of lower(), but this
2082-
# is noticeably less jerky while inputting regexes like
2083-
# '.*debug$' (though the '.*' is redundant there). Those
2084-
# probably have bad interactions with re.search(), which
2085-
# matches anywhere in the string.
2086-
#
2087-
# It's not horrible either way. Just a bit smoother.
2088-
regex_searches = [re.compile(regex).search
2089-
for regex in s.lower().split()]
2090-
2091-
# No exception thrown, so the regexes are okay
2083+
# Use the scoring function for symbols and choices
20922084
bad_re = None
20932085

2094-
# List of matching nodes
2095-
matches = []
2096-
add_match = matches.append
2097-
2098-
# Search symbols and choices
2099-
2100-
for node in _sorted_sc_nodes():
2101-
# Symbol/choice
2102-
sc = node.item
2103-
2104-
for search in regex_searches:
2105-
# Both the name and the prompt might be missing, since
2106-
# we're searching both symbols and choices
2107-
2108-
# Does the regex match either the symbol name or the
2109-
# prompt (if any)?
2110-
if not (sc.name and search(sc.name.lower()) or
2111-
node.prompt and search(node.prompt[0].lower())):
2112-
2113-
# Give up on the first regex that doesn't match, to
2114-
# speed things up a bit when multiple regexes are
2115-
# entered
2116-
break
2117-
2118-
else:
2119-
add_match(node)
2120-
2121-
# Search menus and comments
2086+
# Collect all nodes to search
2087+
all_nodes = list(_sorted_sc_nodes()) + list(_sorted_menu_comment_nodes())
21222088

2123-
for node in _sorted_menu_comment_nodes():
2124-
for search in regex_searches:
2125-
if not search(node.prompt[0].lower()):
2126-
break
2127-
else:
2128-
add_match(node)
2089+
# Score and sort the results
2090+
scored_results = score_search_matches(s, all_nodes)
2091+
matches = [node for node, score in scored_results]
21292092

21302093
except re.error as e:
21312094
# Bad regex. Remember the error message so we can show it.

0 commit comments

Comments
 (0)