-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwordlepy.py
370 lines (288 loc) · 14 KB
/
wordlepy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
"""
MIT License
Copyright (c) 2022 Richard Benjamin Allen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Acknowledgements:
Thanks to Matthew Reagan https://github.com/matthewreagan/WebstersEnglishDictionary for his JSON version of
the Guttenberg Project's Webster's Unabridged English Dictionary!
The original project can be found here: https://github.com/matthewreagan/WebstersEnglishDictionary
Thanks to Peter Coles https://github.com/mrcoles for his list of good starting words!
His interesting blog on the subject can be found here: https://mrcoles.com/best-wordle-starting-word/
"""
import argparse
import json
import os
import re
import sys
from typing import Dict, List, Optional, Pattern, Set
GOOD_STARTING_WORDS: Set = {
"roate",
"orate",
"oater",
"realo",
"taler",
"later",
"ratel",
"artel",
"alter",
"alert"
}
class RawPatternParseError(Exception):
pass
class DictionaryNotFound(Exception):
pass
class InvalidCharacterString(Exception):
pass
class InvalidFilterCombination(Exception):
pass
def parse_json(file_path: str) -> Dict:
"""
Parses a JSON file into a Python dictionary for use in creating filtered subsets.
:param file_path: String containing the file path to the dictionary you wish to parse.
:return: Dictionary containing key value pairs of the JSON file.
:raises: ValueError if the function cannot load the JSON file.
"""
english_dictionary: Dict = {}
with open(file_path, "r") as file:
try:
english_dictionary = json.load(file)
except ValueError:
raise ValueError("Could not parse dictionary, it is probably not in the right format!")
return english_dictionary
def _check_lists(present, absent) -> bool:
"""
Compares two lists to see if they share common elements. If they do this is used to reject them and prompt the
User to correct their input.
:param present: List of characters present in a word.
:param absent: List of characters absent from a word.
:return: True if the two lists share element/s; False if the lists do not share any elements.
"""
return any(item in present for item in absent)
def _listify(chars: str) -> Optional[List]:
"""
Converts a string into a list of characters.
:param chars: String containing the characters you wish to turn into a list.
:return: List of characters.
"""
return list(chars)
def _present(word: str, p_chars: List) -> Optional[str]:
"""
Checks a word for presence of characters from a User defined list.
:param word: String containing the word to test against the list of characters present.
:param p_chars: List of characters present in the word.
:return: String containing the word if it contains the p_chars, None is it doesn't.
"""
matches: List = []
for char in p_chars:
if char in word:
matches.append(True)
else:
matches.append(False)
if all(matches):
return word
return None
def _absent(word: str, a_chars: List) -> Optional[str]:
"""
Checks a word for absence of characters from a User defined list.
:param word: String containing the word to test against the list of absences.
:param a_chars: List of characters absent in the word.
:return: String containing the word if it doesn't contain the a_chars; None if it does.
"""
matches: List = []
for char in a_chars:
if char not in word:
matches.append(True)
else:
matches.append(False)
if all(matches):
return word
return None
def __pattern_parser(raw_patten: str) -> Dict:
"""
Parses a User defined simplified regex and turns it into a dictionary the exact length of the word, using integers
as keys and the regex for that single character as values. A raw_pattern such as: ?^ab^c? for example will yield
a dictionary like {1: "[\\w]", 2: "[^a]", 3: "[b]", 4: "[^c]", 5: [\\w]}.
:param raw_patten: String containing a simplified regex like pattern defined by the User.
:return: Dictionary containing a regex map of the word to be guessed
:raises RawPatternPaseException: if it finds unexpected characters in the raw_pattern.
"""
regex_map: Dict = {}
char_counter: int = 1
for index, char in enumerate(raw_patten):
if char == "?":
regex_map[char_counter] = r"[\w]"
char_counter += 1
elif raw_patten[index - 1] == "^":
regex_map[char_counter] = f"[^{char}]"
char_counter += 1
elif char == "^":
continue
elif char.isalpha():
regex_map[char_counter] = f"[{char}]"
char_counter += 1
else:
raise RawPatternParseError(f"Unexpected character encounted: {char}, please use --help or see the README "
f"for the correct usage.")
return regex_map
def _build_regex(raw_pattern: str) -> Pattern:
"""
Uses a simplified User defined regex pattern to construct a real one, this allows a normal User to harness the
power of Python regular expressions to be used in refining their set of words to try later. It uses a helper
function to construct a dictionary containing the regex for each character position.
A dictionary like {1: "[\\w]", 2: "[^a]", 3: "[b]", 4: "[^c]", 5: [\\w]} will result in a regex pattern like
r"^[\\w][^a][b][^c][\\w]" where the \\w is a wildcard, ^a and ^c excludes those characters in those positions
and [b] includes the b character in that position in the word.
:param raw_pattern: String containing a simplified regex like pattern defined by the User.
:return: Pattern compiled in proper Python regex format.
:raises RawPatternParseError: If the User defined raw pattern doesn't follow the accepted convention.
"""
pattern: str = r"^"
try:
regex_map: Dict = __pattern_parser(raw_pattern)
except RawPatternParseError as error:
raise error
for _, value in regex_map.items():
pattern = pattern + value
return re.compile(pattern)
def refined_by_regex(word_subset: Set, regex):
"""
Uses regex to match words in a set against a pre-compiled regular expression pattern.
:param word_subset: Set containing a pre-filtered words from the English dictionary.
:param regex: Pattern containing a compiled regular expression.
:return: Set with reduced number of words based on the regular expression.
"""
regex_matches: Set = set()
for word in word_subset:
if re.match(regex, word):
regex_matches.add(word)
return regex_matches
def filter_entries_by_presence_or_absence(dictionary_subset: Set, **kwargs) -> Set:
"""
Filters a set of words further by using two helper functions that check if a list of chars is either present or
absent from the set of words. Uses two possible keyword arguments `present` and `absent` which contain a list of
characters which are either present or absent from the word based on your previous wordle guesses.
:param dictionary_subset: Set containing a pre-filtered words from the English dictionary.
:return: Set with reduced number of words if kwargs provided; the original set of words if not.
"""
c_present: List = kwargs.get("present")
c_absent: List = kwargs.get("absent")
filtered_results: Set = set()
if not c_present and not c_absent:
return dictionary_subset
for word in dictionary_subset:
if c_present and c_absent:
if _present(word, c_present) and _absent(word, c_absent):
filtered_results.add(word)
elif c_present and not c_absent:
if _present(word, c_present):
filtered_results.add(word)
elif c_absent and not c_present:
if _absent(word, c_absent):
filtered_results.add(word)
return filtered_results
def main(worldlepy_args) -> Set:
"""
Main function that checks the arguments passed into wordlepy and assembles a filtered set of words accordingly.
:param worldlepy_args: Namespace containing the arguments passed via the terminal.
:return: Set of filtered words depending on the other parameters.
:raises ValueError: If the custom dictionary provided isn't in the correct format.
:raises DictionaryNotFound: If the specified custom dictionary cannot be located using the path specified.
:raises InvalidCharacterString: If the present or absent parameter string contains a non-alphabetic character.
:raises InvalidFilterCombination: If you list a character in both present and absent parameter strings.
"""
suggest: bool = worldlepy_args.suggest
filtered_words: Set
present_chars: List = []
absent_chars: List = []
if suggest:
return GOOD_STARTING_WORDS
length: int = worldlepy_args.length
parsed_dictionary: Dict
if not os.path.exists(worldlepy_args.dictionary):
raise DictionaryNotFound(f"The specified path: {worldlepy_args.dictionary} doesn't exist or is invalid!")
try:
parsed_dictionary = parse_json("data/websters-english-dictionary.json")
except ValueError as error:
raise error
if worldlepy_args.present:
present: str = worldlepy_args.present
if not present.isalpha():
raise InvalidCharacterString("The present characters string you supplied contains non latin characters!")
present_chars = _listify(worldlepy_args.present)
if worldlepy_args.absent:
absent: str = worldlepy_args.absent
if not absent.isalpha():
raise InvalidCharacterString("The absent characters string you supplied contains non latin characters!")
absent_chars = _listify(worldlepy_args.absent)
if _check_lists(present_chars, absent_chars):
raise InvalidFilterCombination("Your present and absent filters cannot contain a shared element!")
# this dictionary contains some invalid entries containing white space and other non-alphabetic characters that
# won't ever appear in a wordle; the below will filter those out using the isalpha() conditional.
pre_filtered_words: Set = {key for key in parsed_dictionary.keys() if len(key) == length and key.isalpha()}
filtered_words = filter_entries_by_presence_or_absence(
pre_filtered_words,
present=present_chars,
absent=absent_chars
)
if worldlepy_args.regex:
for raw_regex in worldlepy_args.regex:
regex: Pattern = _build_regex(raw_regex)
filtered_words = refined_by_regex(filtered_words, regex)
return filtered_words
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Terminal program to help you to optimise your wordle guesses!")
subparser = parser.add_subparsers(dest="command")
refine = subparser.add_parser("refine")
parser.add_argument("-d", "--dictionary", type=str, default="data/websters-english-dictionary.json",
help="Specify the file path to the dictionary that you want to use, the default is "
"websters-english-dictionary. It has to be valid JSON in a dictionary format "
"i.e. it must have words as keys and the "
"meanings as values.")
refine.add_argument("-p", "--present", type=str, required=False,
help="Specify which letters are present to help eliminate the possibilities, provide them as a "
"string literal e.g. `abcdefg`.")
refine.add_argument("-a", "--absent", type=str, required=False,
help="Specify which letters are absent to help eliminate the possibilities, provide them as a "
"string literal e.g. `abcdefg`.")
parser.add_argument("-l", "--length", type=int, required=False, default=5,
help="Specify the length of the word you wish to look up, the default is a length of 5 "
"characters.")
refine.add_argument("-r", "--regex", type=str, action='append', required=False,
help="Specify a word using wild cards to eliminate some duds from the suggestions"
"use the following convention.")
parser.add_argument("-s", "--suggest", action="store_true",
help="Use this to suggest good starting words depending on word length, default is False.")
args = parser.parse_args()
if args.suggest:
if args.command == "refine":
print("You cannot use refine when requesting suggestions for starter words!")
sys.exit(1)
if args.command == "refine":
if not args.regex and not args.present and not args.absent:
print("You cannot use refine without specifying at least one refine argument!")
sys.exit(1)
try:
success = main(args)
print(success)
except(DictionaryNotFound, InvalidCharacterString, InvalidFilterCombination) as main_error:
raise main_error
except BaseException:
raise "Unhandled exception in wordlepy"
if success:
sys.exit(0)
else:
sys.exit(1)