Skip to content

Commit a2fd962

Browse files
authored
Update protocol caches when generating fg caches on incremental build (#5012)
Previously using `--cache-fine-grained` and `--incremental` together would produce invalid protocol caches, which would prevent using `--incremental` to quickly generate fine-grained cache artifacts. This also adds machinery to `testfinegrained` to test building cache artifacts using incremental runs.
1 parent bb13ecc commit a2fd962

File tree

7 files changed

+316
-54
lines changed

7 files changed

+316
-54
lines changed

mypy/build.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -980,8 +980,16 @@ def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]],
980980
proto_meta, proto_cache = get_protocol_deps_cache_name(manager)
981981
meta_snapshot = {} # type: Dict[str, str]
982982
error = False
983-
for id in graph:
984-
meta_snapshot[id] = graph[id].source_hash
983+
for id, st in graph.items():
984+
# If we didn't parse a file (so it doesn't have a
985+
# source_hash), then it must be a module with a fresh cache,
986+
# so use the hash from that.
987+
if st.source_hash:
988+
meta_snapshot[id] = st.source_hash
989+
else:
990+
assert st.meta, "Module must be either parsed or cached"
991+
meta_snapshot[id] = st.meta.hash
992+
985993
if not atomic_write(proto_meta, json.dumps(meta_snapshot), '\n'):
986994
manager.log("Error writing protocol meta JSON file {}".format(proto_cache))
987995
error = True
@@ -1008,11 +1016,12 @@ def read_protocol_cache(manager: BuildManager,
10081016
log_error='Could not load protocol metadata: ')
10091017
if meta_snapshot is None:
10101018
return None
1011-
current_meta_snapshot = {} # type: Dict[str, str]
1012-
for id in graph:
1013-
meta = graph[id].meta
1014-
assert meta is not None, 'Protocol cache should be read after all other'
1015-
current_meta_snapshot[id] = meta.hash
1019+
# Take a snapshot of the source hashes from all of the metas we found.
1020+
# (Including the ones we rejected because they were out of date.)
1021+
# We use this to verify that they match up with the proto_deps.
1022+
current_meta_snapshot = {id: st.meta_source_hash for id, st in graph.items()
1023+
if st.meta_source_hash is not None}
1024+
10161025
common = set(meta_snapshot.keys()) & set(current_meta_snapshot.keys())
10171026
if any(meta_snapshot[id] != current_meta_snapshot[id] for id in common):
10181027
# TODO: invalidate also if options changed (like --strict-optional)?
@@ -1608,7 +1617,8 @@ class State:
16081617
path = None # type: Optional[str] # Path to module source
16091618
xpath = None # type: str # Path or '<string>'
16101619
source = None # type: Optional[str] # Module source code
1611-
source_hash = None # type: str # Hash calculated based on the source code
1620+
source_hash = None # type: Optional[str] # Hash calculated based on the source code
1621+
meta_source_hash = None # type: Optional[str] # Hash of the source given in the meta, if any
16121622
meta = None # type: Optional[CacheMeta]
16131623
data = None # type: Optional[str]
16141624
tree = None # type: Optional[MypyFile]
@@ -1699,6 +1709,7 @@ def __init__(self,
16991709
# TODO: Get mtime if not cached.
17001710
if self.meta is not None:
17011711
self.interface_hash = self.meta.interface_hash
1712+
self.meta_source_hash = self.meta.hash
17021713
self.add_ancestors()
17031714
self.meta = validate_meta(self.meta, self.id, self.path, self.ignore_all, manager)
17041715
if self.meta:
@@ -2089,6 +2100,7 @@ def write_cache(self) -> None:
20892100
return
20902101
dep_prios = self.dependency_priorities()
20912102
dep_lines = self.dependency_lines()
2103+
assert self.source_hash is not None
20922104
new_interface_hash, self.meta = write_cache(
20932105
self.id, self.path, self.tree,
20942106
{k: list(v) for k, v in self.fine_grained_deps.items()},
@@ -2333,23 +2345,29 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
23332345
if manager.options.dump_graph:
23342346
dump_graph(graph)
23352347
return graph
2336-
# If we are loading a fine-grained incremental mode cache, we
2337-
# don't want to do a real incremental reprocess of the graph---we
2338-
# just want to load in all of the cache information.
2339-
if manager.use_fine_grained_cache():
2340-
process_fine_grained_cache_graph(graph, manager)
2341-
# Fine grained protocol dependencies are serialized separately, so we read them
2342-
# after we loaded cache for whole graph. The `read_protocol_cache` will also validate
2343-
# the protocol cache against the loaded individual cache files.
2344-
TypeState.proto_deps = read_protocol_cache(manager, graph)
2345-
if TypeState.proto_deps is None and manager.stats.get('fresh_metas', 0) > 0:
2348+
2349+
# Fine grained protocol dependencies are serialized separately, so we read them
2350+
# after we load the cache for whole graph.
2351+
# We need to read them both for running in daemon mode and if we are generating
2352+
# a fine-grained cache (so that we can properly update them incrementally).
2353+
# The `read_protocol_cache` will also validate
2354+
# the protocol cache against the loaded individual cache files.
2355+
if manager.options.cache_fine_grained or manager.use_fine_grained_cache():
2356+
proto_deps = read_protocol_cache(manager, graph)
2357+
if proto_deps is not None:
2358+
TypeState.proto_deps = proto_deps
2359+
elif manager.stats.get('fresh_metas', 0) > 0:
23462360
# There were some cache files read, but no protocol dependencies loaded.
23472361
manager.log("Error reading protocol dependencies cache -- aborting cache load")
23482362
manager.cache_enabled = False
2349-
TypeState.proto_deps = {}
23502363
manager.log("Falling back to full run -- reloading graph...")
23512364
return dispatch(sources, manager)
23522365

2366+
# If we are loading a fine-grained incremental mode cache, we
2367+
# don't want to do a real incremental reprocess of the graph---we
2368+
# just want to load in all of the cache information.
2369+
if manager.use_fine_grained_cache():
2370+
process_fine_grained_cache_graph(graph, manager)
23532371
else:
23542372
process_graph(graph, manager)
23552373
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:

mypy/test/data.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,13 @@ def teardown(self) -> None:
308308
pycache = os.path.join(path, '__pycache__')
309309
if os.path.isdir(pycache):
310310
shutil.rmtree(pycache)
311+
# As a somewhat nasty hack, ignore any dirs with .mypy_cache in the path,
312+
# to allow test cases to intentionally corrupt the cache without provoking
313+
# the test suite when there are still files left over.
314+
# (Looking at / should be fine on windows because these are paths specified
315+
# in the test cases.)
316+
if '/.mypy_cache' in path:
317+
continue
311318
try:
312319
rmdir(path)
313320
except OSError as error:
@@ -342,6 +349,8 @@ def find_steps(self) -> List[List[FileOperation]]:
342349
The first list item corresponds to the first incremental step, the second for the
343350
second step, etc. Each operation can either be a file modification/creation (UpdateFile)
344351
or deletion (DeleteFile).
352+
353+
Defaults to having two steps if there aern't any operations.
345354
"""
346355
steps = {} # type: Dict[int, List[FileOperation]]
347356
for path, _ in self.files:
@@ -358,8 +367,8 @@ def find_steps(self) -> List[List[FileOperation]]:
358367
for path in paths:
359368
module = module_from_path(path)
360369
steps.setdefault(num, []).append(DeleteFile(module, path))
361-
max_step = max(steps)
362-
return [steps[num] for num in range(2, max_step + 1)]
370+
max_step = max(steps) if steps else 2
371+
return [steps.get(num, []) for num in range(2, max_step + 1)]
363372

364373

365374
def module_from_path(path: str) -> str:

mypy/test/testcheck.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from mypy import build, defaults
1111
from mypy.build import BuildSource, Graph
1212
from mypy.test.config import test_temp_dir
13-
from mypy.test.data import DataDrivenTestCase, DataSuite
13+
from mypy.test.data import DataDrivenTestCase, DataSuite, FileOperation, UpdateFile, DeleteFile
1414
from mypy.test.helpers import (
1515
assert_string_arrays_equal, normalize_error_messages, assert_module_equivalence,
1616
retry_on_error, update_testcase_output, parse_options,
@@ -93,7 +93,6 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
9393
# Incremental tests are run once with a cold cache, once with a warm cache.
9494
# Expect success on first run, errors from testcase.output (if any) on second run.
9595
# We briefly sleep to make sure file timestamps are distinct.
96-
self.clear_cache()
9796
num_steps = max([2] + list(testcase.output2.keys()))
9897
# Check that there are no file changes beyond the last run (they would be ignored).
9998
for dn, dirs, files in os.walk(os.curdir):
@@ -103,18 +102,17 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
103102
raise ValueError(
104103
'Output file {} exists though test case only has {} runs'.format(
105104
file, num_steps))
105+
steps = testcase.find_steps()
106106
for step in range(1, num_steps + 1):
107-
self.run_case_once(testcase, step)
107+
idx = step - 2
108+
ops = steps[idx] if idx < len(steps) and idx >= 0 else []
109+
self.run_case_once(testcase, ops, step)
108110
else:
109111
self.run_case_once(testcase)
110112

111-
def clear_cache(self) -> None:
112-
dn = defaults.CACHE_DIR
113-
114-
if os.path.exists(dn):
115-
shutil.rmtree(dn)
116-
117-
def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0) -> None:
113+
def run_case_once(self, testcase: DataDrivenTestCase,
114+
operations: List[FileOperation] = [],
115+
incremental_step: int = 0) -> None:
118116
original_program_text = '\n'.join(testcase.input)
119117
module_data = self.parse_module(original_program_text, incremental_step)
120118

@@ -127,16 +125,15 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0)
127125
break
128126
elif incremental_step > 1:
129127
# In runs 2+, copy *.[num] files to * files.
130-
for dn, dirs, files in os.walk(os.curdir):
131-
for file in files:
132-
if file.endswith('.' + str(incremental_step)):
133-
full = os.path.join(dn, file)
134-
target = full[:-2]
135-
copy_and_fudge_mtime(full, target)
136-
# Delete files scheduled to be deleted in [delete <path>.num] sections.
137-
for path in testcase.deleted_paths.get(incremental_step, set()):
138-
# Use retries to work around potential flakiness on Windows (AppVeyor).
139-
retry_on_error(lambda: os.remove(path))
128+
for op in operations:
129+
if isinstance(op, UpdateFile):
130+
# Modify/create file
131+
copy_and_fudge_mtime(op.source_path, op.target_path)
132+
else:
133+
# Delete file
134+
# Use retries to work around potential flakiness on Windows (AppVeyor).
135+
path = op.path
136+
retry_on_error(lambda: os.remove(path))
140137

141138
# Parse options after moving files (in case mypy.ini is being moved).
142139
options = parse_options(original_program_text, testcase, incremental_step)

mypy/test/testfinegrained.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,22 +81,13 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
8181
f.write(main_src)
8282

8383
options = self.get_options(main_src, testcase, build_cache=False)
84-
for name, _ in testcase.files:
85-
if 'mypy.ini' in name:
86-
config = name # type: Optional[str]
87-
break
88-
else:
89-
config = None
90-
if config:
91-
parse_config_file(options, config)
84+
build_options = self.get_options(main_src, testcase, build_cache=True)
9285
server = Server(options)
9386

87+
num_regular_incremental_steps = self.get_build_steps(main_src)
9488
step = 1
9589
sources = self.parse_sources(main_src, step, options)
96-
if self.use_cache:
97-
build_options = self.get_options(main_src, testcase, build_cache=True)
98-
if config:
99-
parse_config_file(build_options, config)
90+
if step <= num_regular_incremental_steps:
10091
messages = self.build(build_options, sources)
10192
else:
10293
messages = self.run_check(server, sources)
@@ -122,7 +113,11 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
122113
# Delete file
123114
os.remove(op.path)
124115
sources = self.parse_sources(main_src, step, options)
125-
new_messages = self.run_check(server, sources)
116+
117+
if step <= num_regular_incremental_steps:
118+
new_messages = self.build(build_options, sources)
119+
else:
120+
new_messages = self.run_check(server, sources)
126121

127122
updated = [] # type: List[str]
128123
changed = [] # type: List[str]
@@ -179,6 +174,11 @@ def get_options(self,
179174
if options.follow_imports == 'normal':
180175
options.follow_imports = 'error'
181176

177+
for name, _ in testcase.files:
178+
if 'mypy.ini' in name:
179+
parse_config_file(options, name)
180+
break
181+
182182
return options
183183

184184
def run_check(self, server: Server, sources: List[BuildSource]) -> List[str]:
@@ -205,6 +205,15 @@ def format_triggered(self, triggered: List[List[str]]) -> List[str]:
205205
result.append(('%d: %s' % (n + 2, ', '.join(filtered))).strip())
206206
return result
207207

208+
def get_build_steps(self, program_text: str) -> int:
209+
"""Get the number of regular incremental steps to run, from the test source"""
210+
if not self.use_cache:
211+
return 0
212+
m = re.search('# num_build_steps: ([0-9]+)$', program_text, flags=re.MULTILINE)
213+
if m is not None:
214+
return int(m.group(1))
215+
return 1
216+
208217
def parse_sources(self, program_text: str,
209218
incremental_step: int,
210219
options: Options) -> List[BuildSource]:

mypy/test/testfinegrainedcache.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@
1111
class FineGrainedCacheSuite(mypy.test.testfinegrained.FineGrainedSuite):
1212
use_cache = True
1313
test_name_suffix = '_cached'
14+
files = (
15+
mypy.test.testfinegrained.FineGrainedSuite.files + ['fine-grained-cache-incremental.test'])

test-data/unit/check-incremental.test

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4319,3 +4319,45 @@ def foo() -> None:
43194319
[out]
43204320
[out2]
43214321
tmp/main.py:2: error: "int" has no attribute "foo"
4322+
4323+
[case testIncrementalBustedFineGrainedCache1]
4324+
# flags: --cache-fine-grained
4325+
import a
4326+
import b
4327+
[file a.py]
4328+
[file b.py]
4329+
-- This is a heinous hack, but we simulate having a invalid cache by clobbering
4330+
-- the proto deps file with something with hash mismatches.
4331+
[file ../.mypy_cache/3.6/@proto_deps.meta.json.2]
4332+
{"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"}
4333+
[file b.py.2]
4334+
# uh
4335+
-- Every file should get reloaded, since the cache was invalidated
4336+
[stale a, b, builtins]
4337+
[rechecked a, b, builtins]
4338+
4339+
[case testIncrementalBustedFineGrainedCache2]
4340+
# flags2: --cache-fine-grained
4341+
import a
4342+
import b
4343+
[file a.py]
4344+
[file b.py]
4345+
[file b.py.2]
4346+
# uh
4347+
-- Every file should get reloaded, since the settings changed
4348+
[stale a, b, builtins]
4349+
[rechecked a, b, builtins]
4350+
4351+
[case testIncrementalWorkingFineGrainedCache]
4352+
# flags: --cache-fine-grained
4353+
# flags2: --cache-fine-grained
4354+
import a
4355+
import b
4356+
[file a.py]
4357+
[file b.py]
4358+
[file b.py.2]
4359+
# uh
4360+
-- b gets rechecked because it changed, but nothing is stale
4361+
-- since the interface didn't change
4362+
[stale]
4363+
[rechecked b]

0 commit comments

Comments
 (0)