Skip to content

Commit 06615bd

Browse files
committed
feat: adds a tool to consolidate (hard-link) duplicated files
Many files in the libs/ directory are duplicated because most archs share a lot in common. Mantaining modification along all possible archs is tedious, so this tool hardlinks all identical files son modifying one is modifying all of them. Git is agnostic to this.
1 parent d143081 commit 06615bd

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

tools/consolidate-libs.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python
2+
3+
__doc__ = """Scans src/lib/<arch>/** and does hardlinks to files
4+
with the same name and content"""
5+
6+
import glob
7+
import os
8+
import re
9+
from collections import defaultdict
10+
from pathlib import Path
11+
from typing import NamedTuple
12+
13+
ROOT_DIR = Path(__file__).parent.parent.absolute() / "src" / "lib" / "arch"
14+
ARCHS = "zx48k", "zxnext"
15+
16+
17+
class FileInfo(NamedTuple):
18+
path: str
19+
hash: int
20+
21+
22+
def get_file_list(root: Path) -> list[str]:
23+
filelist = glob.glob(str(root / "**" / "*"), recursive=True)
24+
return [f for f in filelist if os.path.isfile(f)]
25+
26+
27+
def scan_arch(root: Path) -> dict[FileInfo, list[str]]:
28+
result = defaultdict(list)
29+
re_arch = re.compile(r"^.*?/src/lib/arch/[^/]+/(.*)$")
30+
31+
files = get_file_list(root)
32+
for file in files:
33+
match = re_arch.match(file)
34+
if not match:
35+
continue
36+
37+
path = match.group(1)
38+
result[FileInfo(path=path, hash=hash(open(file, "rb").read()))].append(file)
39+
40+
return result
41+
42+
43+
def fold_files(scan: dict[FileInfo, list[str]]) -> None:
44+
for path, files in scan.items():
45+
if len(files) == 1:
46+
continue
47+
48+
main_file = files[0]
49+
for file in files[1:]:
50+
print(f"Linking {main_file} to {file}")
51+
os.unlink(file)
52+
os.link(main_file, file)
53+
54+
55+
def main():
56+
scan = scan_arch(ROOT_DIR)
57+
fold_files(scan)
58+
59+
60+
if __name__ == "__main__":
61+
main()

0 commit comments

Comments
 (0)