Skip to content

Commit 371c086

Browse files
committed
Add '--algo' option for algorithm selection.
The interaction with '--brute' is a bit messy, but that's a fix for another day.
1 parent 02923fd commit 371c086

File tree

3 files changed

+47
-16
lines changed

3 files changed

+47
-16
lines changed

README.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ This tool is meant for small problems. It is written in Python, after all.
4343
### Usage
4444

4545
```console
46-
usage: superstring [-h] [-q | -v] [-s | -S | -L LOOPS | -B] [-C STR] [-F STR] [-j] [-i] [-l] [-R] [-G] [-V] [infile]
46+
usage: superstring [-h] [-q | -v] [-s | -S | -L LOOPS | -B | -A ALGO] [-C STR] [-F STR] [-j] [-i] [-l] [-R] [-G] [-V] [infile]
4747

4848
Approximate Shortest Superstring Generator -- https://github.com/eloj/superstrings
4949

@@ -58,6 +58,7 @@ options:
5858
-S, --sort Sort input by entry frequency
5959
-L LOOPS, --loops LOOPS Shuffle and regenerate until min-length doesn't improve
6060
-B, --brute Use brute-force. Warning: Only for tiny inputs!
61+
-A ALGO, --algo ALGO Algorithm selection
6162
-C STR, --comment STR String(s) that start a comment in the input
6263
-F STR, --mtf STR Input element(s) to move-to-front
6364
-j, --join-only Only join input, don't generate superstring
@@ -118,6 +119,9 @@ $ ./superstring -q --brute data/greedy-hard.txt
118119
cababababc
119120
```
120121

122+
The `--algo` option can be used to pick the search algorithm. The default is 'greedy', unless `--brute` was specified, in which
123+
case it's 'brutedp'. If you specify an invalid name here, you should get a list of valid options back.
124+
121125
## Python Superstring Library API
122126

123127
`generate_superstring(list) -> str`
@@ -132,6 +136,9 @@ cababababc
132136
`brutedp(list) -> str`
133137
: Given a substring-free list of strings, returns an optimal superstring as generated by brute-force dynamic-programming approach.
134138

139+
`brutedijkstra(list) -> str`
140+
: Given a substring-free list of strings, returns an optimal superstring as generated by brute-force graph search using [Dijkstra's algorithm](https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm).
141+
135142
`make_substring_free(list) -> list`
136143
: For Greedy to work as originally specified, its input must be _substring-free_ (aka _factor-free_), i.e contain no elements that are substrings of one another. This function will process a list to ensure this pre-condition is true.
137144

@@ -151,7 +158,6 @@ res = ssp.generate_superstring(arr)
151158
## TODO
152159

153160
* Verify that the type-spec in ssp.py is actually correct.
154-
* Algorithm selection.
155161
* Implement encoding into bitstrings.
156162
* Built-in benchmark.
157163

ssp.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def generate_superstring(strings: list[str], func=greedy) -> str:
272272
return func(make_substring_free(strings))
273273

274274

275-
def basic_test():
275+
def basic_tests() -> None:
276276
""" Basic functionality test """
277277
# Hard input for GREEDY. Kaplan 2005.
278278
# { c(ab)^k, (ba)^k, (ab)^kc }
@@ -296,4 +296,4 @@ def basic_test():
296296
print(f"brutedijkstra(): {res}, len={len(res)}")
297297

298298
if __name__ == "__main__":
299-
basic_test()
299+
basic_tests()

superstring

+37-12
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
Approximate Shortest Superstring Generator
44
https://github.com/eloj/superstrings/
55
6-
Copyright © 2024 Eddy Jansson
6+
Copyright © 2024, 2025 Eddy Jansson
77
Licensed under the MIT License. See LICENSE file for details.
88
"""
99
from math import log2, ceil, gcd
1010
import sys
1111
import argparse
1212
import random
13+
import inspect
1314
import ssp
1415

1516
# TODO: bits/bytes helper print function
@@ -22,6 +23,15 @@ import ssp
2223

2324
PROGRAM_VERSION = "1.0.0"
2425

26+
def get_algos(module='ssp'):
27+
return { n : f for (n,f) in inspect.getmembers(sys.modules[module], inspect.isfunction) if not '_' in n }
28+
29+
def get_algo_func_by_name(name):
30+
for algo, func in get_algos().items():
31+
if name == algo:
32+
return func
33+
return None
34+
2535
def bits(n):
2636
""" Return the number of bits needed to encode the number _n_ """
2737
if n == 0:
@@ -35,7 +45,7 @@ def bits(n):
3545
# # print(a, bin(a))
3646
# return bits(a)
3747

38-
def data_loader(args):
48+
def data_loader(args) -> list[str]:
3949
""" Load data into array """
4050
res = []
4151
lines = []
@@ -87,6 +97,7 @@ if __name__ == "__main__":
8797
order_group.add_argument("-S", "--sort", help="Sort input by entry frequency", action="store_true", default=False)
8898
order_group.add_argument("-L", "--loops", help="Shuffle and regenerate until min-length doesn't improve", action="store", type=int, default=0)
8999
order_group.add_argument("-B", "--brute", help="Use brute-force. Warning: Only for tiny inputs!", action="store_true")
100+
order_group.add_argument("-A", "--algo", metavar="ALGO", help="Algorithm selection", action="store", type=str)
90101

91102
argp.add_argument("-C", "--comment", metavar="STR", help="String(s) that start a comment in the input", action="extend", nargs=1, type=str)
92103
argp.add_argument("-F", "--mtf", metavar="STR", help="Input element(s) to move-to-front", action="extend", nargs=1, type=str)
@@ -109,6 +120,27 @@ if __name__ == "__main__":
109120
if args.verbose > 1:
110121
print(f"Comment prefix(es): {args.comment}")
111122

123+
# Algorithm selection
124+
algo_func = None
125+
if args.algo:
126+
algo_func = get_algo_func_by_name(args.algo)
127+
if algo_func is None:
128+
sys.exit(f"Invalid algorithm name {args.algo}, try one of {[k for k in get_algos().keys()]}")
129+
args.brute = args.algo.startswith("brute")
130+
131+
if args.brute:
132+
if not algo_func:
133+
algo_func = ssp.brutedp
134+
if not args.quiet:
135+
if args.verbose > 0:
136+
print("WARNING: Using brute-force is extremely slow. Use only on very small inputs.")
137+
138+
if not algo_func:
139+
algo_func = ssp.greedy
140+
141+
if args.verbose > 0:
142+
print(f"Using algorithm {algo_func}")
143+
112144
original_input = data_loader(args)
113145
input_bps = 0
114146
transformed_input = []
@@ -160,12 +192,8 @@ if __name__ == "__main__":
160192
for v in unique_input:
161193
unique_input_len += len(v)
162194

163-
if args.brute:
164-
if not args.quiet:
165-
if args.verbose > 0:
166-
print("WARNING: Using brute-force is extremely slow. Use only on very small inputs.")
167-
if len(unique_input) >= 30:
168-
print(f"WARNING: This run is unlikely to return a result in reasonable time (N={len(unique_input)}).")
195+
if args.brute and len(unique_input) >= 30:
196+
print(f"WARNING: This run is unlikely to return a result in reasonable time (N={len(unique_input)}).")
169197

170198
# Calculate fixed bits-per-symbol from input.
171199
## if input_bps == 0:
@@ -193,10 +221,7 @@ if __name__ == "__main__":
193221
if args.join_only:
194222
output = "".join(transformed_input)
195223
else:
196-
if args.brute:
197-
min_output = ssp.generate_superstring(transformed_input, ssp.brutedp)
198-
else:
199-
min_output = ssp.generate_superstring(transformed_input)
224+
min_output = ssp.generate_superstring(transformed_input, algo_func)
200225
if args.loops:
201226
# Shuffle input and regenerate superstring until minimum length doesn't improve.
202227
if not args.quiet:

0 commit comments

Comments
 (0)