Skip to content

Commit aaad048

Browse files
authored
Prevent duplicates in module dependency lists (#7240)
Currently in some cases we will write out a lot of duplicate entries in the dependency lists of modules. Refactor dependency management to prevent this.
1 parent 55375f6 commit aaad048

File tree

2 files changed

+55
-31
lines changed

2 files changed

+55
-31
lines changed

mypy/build.py

+54-30
Original file line numberDiff line numberDiff line change
@@ -1626,8 +1626,14 @@ class State:
16261626
meta = None # type: Optional[CacheMeta]
16271627
data = None # type: Optional[str]
16281628
tree = None # type: Optional[MypyFile]
1629+
# We keep both a list and set of dependencies. A set because it makes it efficient to
1630+
# prevent duplicates and the list because I am afraid of changing the order of
1631+
# iteration over dependencies.
1632+
# They should be managed with add_dependency and suppress_dependency.
16291633
dependencies = None # type: List[str] # Modules directly imported by the module
1634+
dependencies_set = None # type: Set[str] # The same but as a set for deduplication purposes
16301635
suppressed = None # type: List[str] # Suppressed/missing dependencies
1636+
suppressed_set = None # type: Set[str] # Suppressed/missing dependencies
16311637
priorities = None # type: Dict[str, int]
16321638

16331639
# Map each dependency to the line number where it is first imported
@@ -1735,7 +1741,9 @@ def __init__(self,
17351741
# Make copies, since we may modify these and want to
17361742
# compare them to the originals later.
17371743
self.dependencies = list(self.meta.dependencies)
1744+
self.dependencies_set = set(self.dependencies)
17381745
self.suppressed = list(self.meta.suppressed)
1746+
self.suppressed_set = set(self.suppressed)
17391747
all_deps = self.dependencies + self.suppressed
17401748
assert len(all_deps) == len(self.meta.dep_prios)
17411749
self.priorities = {id: pri
@@ -1925,13 +1933,15 @@ def fix_suppressed_dependencies(self, graph: Graph) -> None:
19251933
new_dependencies = []
19261934
entry_points = self.manager.source_set.source_modules
19271935
for dep in self.dependencies + self.suppressed:
1928-
ignored = dep in self.suppressed and dep not in entry_points
1936+
ignored = dep in self.suppressed_set and dep not in entry_points
19291937
if ignored or dep not in graph:
19301938
new_suppressed.append(dep)
19311939
else:
19321940
new_dependencies.append(dep)
19331941
self.dependencies = new_dependencies
1942+
self.dependencies_set = set(new_dependencies)
19341943
self.suppressed = new_suppressed
1944+
self.suppressed_set = set(new_suppressed)
19351945

19361946
# Methods for processing modules from source code.
19371947

@@ -2039,6 +2049,22 @@ def semantic_analysis_pass1(self) -> None:
20392049
with self.wrap_context():
20402050
first.visit_file(self.tree, self.xpath, self.id, options)
20412051

2052+
def add_dependency(self, dep: str) -> None:
2053+
if dep not in self.dependencies_set:
2054+
self.dependencies.append(dep)
2055+
self.dependencies_set.add(dep)
2056+
if dep in self.suppressed_set:
2057+
self.suppressed.remove(dep)
2058+
self.suppressed_set.remove(dep)
2059+
2060+
def suppress_dependency(self, dep: str) -> None:
2061+
if dep in self.dependencies_set:
2062+
self.dependencies.remove(dep)
2063+
self.dependencies_set.remove(dep)
2064+
if dep not in self.suppressed_set:
2065+
self.suppressed.append(dep)
2066+
self.suppressed_set.add(dep)
2067+
20422068
def compute_dependencies(self) -> None:
20432069
"""Compute a module's dependencies after parsing it.
20442070
@@ -2052,28 +2078,27 @@ def compute_dependencies(self) -> None:
20522078
# Compute (direct) dependencies.
20532079
# Add all direct imports (this is why we needed the first pass).
20542080
# Also keep track of each dependency's source line.
2055-
dependencies = []
2056-
priorities = {} # type: Dict[str, int] # id -> priority
2057-
dep_line_map = {} # type: Dict[str, int] # id -> line
2081+
# Missing dependencies will be moved from dependencies to
2082+
# suppressed when they fail to be loaded in load_graph.
2083+
2084+
self.dependencies = []
2085+
self.dependencies_set = set()
2086+
self.suppressed = []
2087+
self.suppressed_set = set()
2088+
self.priorities = {} # id -> priority
2089+
self.dep_line_map = {} # id -> line
20582090
dep_entries = (manager.all_imported_modules_in_file(self.tree) +
20592091
self.manager.plugin.get_additional_deps(self.tree))
20602092
for pri, id, line in dep_entries:
2061-
priorities[id] = min(pri, priorities.get(id, PRI_ALL))
2093+
self.priorities[id] = min(pri, self.priorities.get(id, PRI_ALL))
20622094
if id == self.id:
20632095
continue
2064-
if id not in dep_line_map:
2065-
dependencies.append(id)
2066-
dep_line_map[id] = line
2096+
self.add_dependency(id)
2097+
if id not in self.dep_line_map:
2098+
self.dep_line_map[id] = line
20672099
# Every module implicitly depends on builtins.
2068-
if self.id != 'builtins' and 'builtins' not in dep_line_map:
2069-
dependencies.append('builtins')
2070-
2071-
# Missing dependencies will be moved from dependencies to
2072-
# suppressed when they fail to be loaded in load_graph.
2073-
self.dependencies = dependencies
2074-
self.suppressed = []
2075-
self.priorities = priorities
2076-
self.dep_line_map = dep_line_map
2100+
if self.id != 'builtins':
2101+
self.add_dependency('builtins')
20772102

20782103
self.check_blockers() # Can fail due to bogus relative imports
20792104

@@ -2085,7 +2110,7 @@ def semantic_analysis(self) -> None:
20852110
self.manager.semantic_analyzer.visit_file(self.tree, self.xpath, self.options, patches)
20862111
self.patches = patches
20872112
for dep in self.manager.semantic_analyzer.imports:
2088-
self.dependencies.append(dep)
2113+
self.add_dependency(dep)
20892114
self.priorities[dep] = PRI_LOW
20902115

20912116
def semantic_analysis_pass_three(self) -> None:
@@ -2157,11 +2182,11 @@ def _patch_indirect_dependencies(self,
21572182
for dep in sorted(extra):
21582183
if dep not in self.manager.modules:
21592184
continue
2160-
if dep not in self.suppressed and dep not in self.manager.missing_modules:
2161-
self.dependencies.append(dep)
2185+
if dep not in self.suppressed_set and dep not in self.manager.missing_modules:
2186+
self.add_dependency(dep)
21622187
self.priorities[dep] = PRI_INDIRECT
2163-
elif dep not in self.suppressed and dep in self.manager.missing_modules:
2164-
self.suppressed.append(dep)
2188+
elif dep not in self.suppressed_set and dep in self.manager.missing_modules:
2189+
self.suppress_dependency(dep)
21652190

21662191
def compute_fine_grained_deps(self) -> Dict[str, Set[str]]:
21672192
assert self.tree is not None
@@ -2204,6 +2229,8 @@ def write_cache(self) -> None:
22042229
dep_prios = self.dependency_priorities()
22052230
dep_lines = self.dependency_lines()
22062231
assert self.source_hash is not None
2232+
assert len(set(self.dependencies)) == len(self.dependencies), (
2233+
"Duplicates in dependencies list for {} ({})".format(self.id, self.dependencies))
22072234
new_interface_hash, self.meta = write_cache(
22082235
self.id, self.path, self.tree,
22092236
list(self.dependencies), list(self.suppressed), list(self.child_modules),
@@ -2733,7 +2760,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager,
27332760
# comment about this in `State.__init__()`.
27342761
added = []
27352762
for dep in st.ancestors + dependencies + st.suppressed:
2736-
ignored = dep in st.suppressed and dep not in entry_points
2763+
ignored = dep in st.suppressed_set and dep not in entry_points
27372764
if ignored and dep not in added:
27382765
manager.missing_modules.add(dep)
27392766
elif dep not in graph:
@@ -2747,20 +2774,17 @@ def load_graph(sources: List[BuildSource], manager: BuildManager,
27472774
newst = State(id=dep, path=None, source=None, manager=manager,
27482775
caller_state=st, caller_line=st.dep_line_map.get(dep, 1))
27492776
except ModuleNotFound:
2750-
if dep in st.dependencies:
2751-
st.dependencies.remove(dep)
2752-
st.suppressed.append(dep)
2777+
if dep in st.dependencies_set:
2778+
st.suppress_dependency(dep)
27532779
else:
27542780
assert newst.id not in graph, newst.id
27552781
graph[newst.id] = newst
27562782
new.append(newst)
27572783
if dep in st.ancestors and dep in graph:
27582784
graph[dep].child_modules.add(st.id)
2759-
if dep in graph and dep in st.suppressed:
2785+
if dep in graph and dep in st.suppressed_set:
27602786
# Previously suppressed file is now visible
2761-
if dep in st.suppressed:
2762-
st.suppressed.remove(dep)
2763-
st.dependencies.append(dep)
2787+
st.add_dependency(dep)
27642788
manager.plugin.set_modules(manager.modules)
27652789
return graph
27662790

mypy/newsemanal/semanal_main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def semantic_analyze_target(target: str,
331331
if isinstance(node, Decorator):
332332
infer_decorator_signature_if_simple(node, analyzer)
333333
for dep in analyzer.imports:
334-
state.dependencies.append(dep)
334+
state.add_dependency(dep)
335335
priority = mypy.build.PRI_LOW
336336
if priority <= state.priorities.get(dep, priority):
337337
state.priorities[dep] = priority

0 commit comments

Comments
 (0)