Skip to content

Commit 1426dc0

Browse files
Merge pull request #5461 from hannes-steffenhagen-diffblue/cmdline-spell-correction
Add spell correction for CLI options
2 parents cc4d1ac + ec9c37f commit 1426dc0

File tree

11 files changed

+527
-1
lines changed

11 files changed

+527
-1
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CORE
2+
dummy.c
3+
--traec
4+
did you mean --trace
5+
^EXIT=(1|64)$
6+
^SIGNAL=0$
7+
--
8+
--
9+
This checks that we get a useful suggestion when we make a typo on the
10+
commandline.
11+
12+
The error code is 1 on linux/osx and 64 on windows for some reason.

regression/validate-trace-xml-schema/check.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
# these test for invalid command line handling
3434
'bad_option/test_multiple.desc',
3535
'bad_option/test.desc',
36+
'unknown-argument-suggestion/test.desc',
3637
# this one produces XML intermingled with main XML output when used with --xml-ui
3738
'graphml_witness2/test.desc',
3839
# produces intermingled XML on the command line

src/util/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ SRC = allocate_objects.cpp \
1111
cout_message.cpp \
1212
dstring.cpp \
1313
endianness_map.cpp \
14+
edit_distance.cpp \
1415
expr.cpp \
1516
expr_initializer.cpp \
1617
expr_util.cpp \

src/util/cmdline.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Author: Daniel Kroening, [email protected]
88

99
#include "cmdline.h"
1010

11+
#include <util/edit_distance.h>
1112
#include <util/exception_utils.h>
1213
#include <util/invariant.h>
1314

@@ -250,6 +251,66 @@ cmdlinet::option_namest cmdlinet::option_names() const
250251
return option_namest{*this};
251252
}
252253

254+
std::vector<std::string>
255+
cmdlinet::get_argument_suggestions(const std::string &unknown_argument)
256+
{
257+
struct suggestiont
258+
{
259+
std::size_t distance;
260+
std::string suggestion;
261+
262+
bool operator<(const suggestiont &other) const
263+
{
264+
return distance < other.distance;
265+
}
266+
};
267+
268+
auto argument_suggestions = std::vector<suggestiont>{};
269+
// We allow 3 errors here. This can lead to the output being a bit chatty,
270+
// which we mitigate by reducing suggestions to those with the minimum
271+
// distance further down below
272+
const auto argument_matcher = levenshtein_automatont{unknown_argument, 3};
273+
for(const auto &option : options)
274+
{
275+
if(option.islong)
276+
{
277+
const auto long_name = "--" + option.optstring;
278+
if(auto distance = argument_matcher.get_edit_distance(long_name))
279+
{
280+
argument_suggestions.push_back({distance.value(), long_name});
281+
}
282+
}
283+
if(!option.islong)
284+
{
285+
const auto short_name = std::string{"-"} + option.optchar;
286+
if(auto distance = argument_matcher.get_edit_distance(short_name))
287+
{
288+
argument_suggestions.push_back({distance.value(), short_name});
289+
}
290+
}
291+
}
292+
293+
auto final_suggestions = std::vector<std::string>{};
294+
if(!argument_suggestions.empty())
295+
{
296+
// we only want to keep suggestions with the minimum distance
297+
// because otherwise they become quickly too noisy to be useful
298+
auto min = std::min_element(
299+
argument_suggestions.begin(), argument_suggestions.end());
300+
INVARIANT(
301+
min != argument_suggestions.end(),
302+
"there is a minimum because it's not empty");
303+
for(auto const &suggestion : argument_suggestions)
304+
{
305+
if(suggestion.distance == min->distance)
306+
{
307+
final_suggestions.push_back(suggestion.suggestion);
308+
}
309+
}
310+
}
311+
return final_suggestions;
312+
}
313+
253314
cmdlinet::option_namest::option_names_iteratort::option_names_iteratort(
254315
const cmdlinet *command_line,
255316
std::size_t index)

src/util/cmdline.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ class cmdlinet
9494
cmdlinet();
9595
virtual ~cmdlinet();
9696

97+
std::vector<std::string>
98+
get_argument_suggestions(const std::string &unknown_argument);
99+
97100
protected:
98101
struct optiont
99102
{

src/util/edit_distance.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/// \file
2+
/// \author Diffblue Ltd.
3+
///
4+
/// Provides a way to compute edit distance between two strings
5+
6+
#include "edit_distance.h"
7+
8+
levenshtein_automatont::levenshtein_automatont(
9+
const std::string &string,
10+
std::size_t allowed_errors)
11+
{
12+
const std::size_t layer_offset = string.size() + 1;
13+
for(std::size_t i = 0; i <= allowed_errors; ++i)
14+
{
15+
final_states.push_back(string.size() + layer_offset * i);
16+
}
17+
for(std::size_t string_index = 0; string_index < string.size();
18+
++string_index)
19+
{
20+
for(std::size_t error_layer = 0; error_layer <= allowed_errors;
21+
++error_layer)
22+
{
23+
// position string_index matches
24+
nfa.add_transition(
25+
error_layer * layer_offset + string_index,
26+
string[string_index],
27+
error_layer * layer_offset + string_index + 1);
28+
if(error_layer < allowed_errors)
29+
{
30+
// insertion, swap or deletion
31+
nfa.add_arbitrary_transition(
32+
error_layer * layer_offset + string_index,
33+
(error_layer + 1) * layer_offset + string_index);
34+
nfa.add_epsilon_transition(
35+
error_layer * layer_offset + string_index,
36+
(error_layer + 1) * layer_offset + string_index + 1);
37+
nfa.add_arbitrary_transition(
38+
error_layer * layer_offset + string_index,
39+
(error_layer + 1) * layer_offset + string_index + 1);
40+
}
41+
}
42+
}
43+
for(std::size_t error_layer = 0; error_layer < allowed_errors; ++error_layer)
44+
{
45+
// arbitrary transitions between error layers
46+
nfa.add_arbitrary_transition(
47+
error_layer * layer_offset + string.size(),
48+
(error_layer + 1) * layer_offset + string.size());
49+
}
50+
}
51+
52+
bool levenshtein_automatont::matches(const std::string &string) const
53+
{
54+
return get_edit_distance(string).has_value();
55+
}
56+
57+
optionalt<std::size_t>
58+
levenshtein_automatont::get_edit_distance(const std::string &string) const
59+
{
60+
auto current = nfa.initial_state(0);
61+
for(const auto c : string)
62+
{
63+
current = nfa.next_state(current, c);
64+
}
65+
for(std::size_t distance = 0; distance < final_states.size(); ++distance)
66+
{
67+
if(current.contains(final_states[distance]))
68+
{
69+
return distance;
70+
}
71+
}
72+
return nullopt;
73+
}

src/util/edit_distance.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/// \file
2+
/// \author Diffblue Ltd.
3+
///
4+
/// Loosely based on this blog post:
5+
/// http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
6+
/// Provides a way to compute edit distance between two strings
7+
///
8+
/// No conversion to DFA or other optimisations are done here because for our
9+
/// use case (i.e. suggestions for errors in command line specifications) this
10+
/// is fast enough without them.
11+
12+
#ifndef CPROVER_UTIL_EDIT_DISTANCE_H
13+
#define CPROVER_UTIL_EDIT_DISTANCE_H
14+
15+
#include "nfa.h"
16+
17+
#include <cstddef>
18+
#include <string>
19+
20+
#include <util/optional.h>
21+
22+
/// Simple automaton that can detect whether a string can be transformed into
23+
/// another with a limited number of deletions, insertions or substitutions.
24+
/// Not a very fast implementation, but should be good enough for small strings.
25+
struct levenshtein_automatont
26+
{
27+
private:
28+
nfat<char> nfa;
29+
using state_labelt = nfat<char>::state_labelt;
30+
std::vector<state_labelt> final_states;
31+
32+
public:
33+
levenshtein_automatont(
34+
const std::string &string,
35+
std::size_t allowed_errors = 2);
36+
37+
bool matches(const std::string &string) const;
38+
optionalt<std::size_t> get_edit_distance(const std::string &string) const;
39+
40+
void dump_automaton_dot_to(std::ostream &out)
41+
{
42+
nfa.dump_automaton_dot_to(out);
43+
};
44+
};
45+
46+
#endif // CPROVER_UTIL_EDIT_DISTANCE_H

0 commit comments

Comments
 (0)