Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fast unused transitive imports #39

Merged
merged 8 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions ImportGraph/Imports.lean
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,13 @@ end Lean.NameMap
Returns a `List (Name × List Name)` with a key for each module `n` in `amongst`,
whose corresponding value is the list of modules `m` in `amongst` which are transitively imported by `n`,
but no declaration in `n` makes use of a declaration in `m`.

The current implementation is too slow to run on the entirety of Mathlib,
although it should be fine for any sequential chain of imports in Mathlib.
-/
def unusedTransitiveImports (amongst : List Name) : CoreM (List (Name × List Name)) := do
def unusedTransitiveImports (amongst : List Name) (verbose : Bool := false) : CoreM (List (Name × List Name)) := do
let env ← getEnv
let transitiveImports := env.importGraph.transitiveClosure
let transitivelyRequired ← env.transitivelyRequiredModules' amongst verbose
amongst.mapM fun n => do return (n,
let unused := (transitiveImports.find? n).getD {} \ (← env.transitivelyRequiredModules n)
let unused := (transitiveImports.find? n).getD {} \ (transitivelyRequired.find? n |>.getD {})
amongst.filter (fun m => unused.contains m))

/--
Expand Down
51 changes: 51 additions & 0 deletions ImportGraph/RequiredModules.lean
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,57 @@ def Environment.transitivelyRequiredModules (env : Environment) (module : Name)
|>.filter (env.getModuleFor? · = some module)
(NameSet.ofList constants).transitivelyRequiredModules env

/--
Computes all the modules transitively required by the specified modules.
Should be equivalent to calling `transitivelyRequiredModules` on each module, but shares more of the work.
-/
partial def Environment.transitivelyRequiredModules' (env : Environment) (modules : List Name) (verbose : Bool := false) :
CoreM (NameMap NameSet) := do
let N := env.header.moduleNames.size
let mut c2m : NameMap (BitVec N) := {}
let mut pushed : NameSet := {}
let mut result : NameMap NameSet := {}
for m in modules do
if verbose then
IO.println s!"Processing module {m}"
let mut r : BitVec N := 0
for n in env.header.moduleData[(env.header.moduleNames.getIdx? m).getD 0]!.constNames do
if ! n.isInternal then
-- This is messy: Mathlib is big enough that writing a recursive function causes a stack overflow.
-- So we use an explicit stack instead. We visit each constant twice:
-- once to record the constants transitively used by it,
-- and again to record the modules which defined those constants.
let mut stack : List (Name × Option NameSet) := [⟨n, none⟩]
pushed := pushed.insert n
while !stack.isEmpty do
match stack with
| [] => panic! "Stack is empty"
| (c, used?) :: tail =>
stack := tail
match used? with
| none =>
if !c2m.contains c then
let used := (← getConstInfo c).getUsedConstantsAsSet
stack := ⟨c, some used⟩ :: stack
for u in used do
if !pushed.contains u then
stack := ⟨u, none⟩ :: stack
pushed := pushed.insert u
| some used =>
let usedModules : NameSet :=
used.fold (init := {}) (fun s u => if let some m := env.getModuleFor? u then s.insert m else s)
let transitivelyUsed : BitVec N :=
used.fold (init := toBitVec usedModules) (fun s u => s ||| ((c2m.find? u).getD 0))
c2m := c2m.insert c transitivelyUsed
r := r ||| ((c2m.find? n).getD 0)
result := result.insert m (toNameSet r)
return result
where
toBitVec {N : Nat} (s : NameSet) : BitVec N :=
s.fold (init := 0) (fun b n => b ||| BitVec.twoPow _ ((env.header.moduleNames.getIdx? n).getD 0))
toNameSet {N : Nat} (b : BitVec N) : NameSet :=
env.header.moduleNames.zipWithIndex.foldl (init := {}) (fun s (n, i) => if b.getLsbD i then s.insert n else s)

/--
Return the names of the modules in which constants used in the current file were defined.

Expand Down
23 changes: 23 additions & 0 deletions ImportGraph/UnusedTransitiveImports.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import ImportGraph.Imports

open Lean

def Core.withImportModules (modules : Array Name) {α} (f : CoreM α) : IO α := do
searchPathRef.set compile_time_search_path%
unsafe Lean.withImportModules (modules.map (fun m => {module := m})) {} (trustLevel := 1024)
fun env => Prod.fst <$> Core.CoreM.toIO
(ctx := { fileName := "<CoreM>", fileMap := default }) (s := { env := env }) do f

/--
`lake exe unused_transitive_imports m1 m2 ...`

For each specified module `m`, prints those `n` from the argument list which are imported, but transitively unused by `m`.
-/
def main (args : List String) : IO UInt32 := do
let (flags, args) := args.partition (fun s => s.startsWith "-")
let mut modules := args.map (fun s => s.toName)
Core.withImportModules modules.toArray do
let r ← unusedTransitiveImports modules (verbose := flags.contains "-v" || flags.contains "--verbose")
for (n, u) in r do
IO.println s!"{n}: {u}"
return 0
9 changes: 9 additions & 0 deletions ImportGraphTest/Imports.lean
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import ImportGraph.Imports
import ImportGraph.RequiredModules
import ImportGraphTest.Used

open Lean

Expand Down Expand Up @@ -36,6 +37,7 @@ elab "#unused_transitive_imports" names:ident* : command => do
logInfo <| s!"Transitively unused imports of {n}:\n{"\n".intercalate (u.map (fun i => s!" {i}"))}"

-- This test case can be removed after nightly-2024-10-24, because these imports have been cleaned up.
-- It should be replaced with another test case!
/--
info: Transitively unused imports of Init.Control.StateRef:
Init.System.IO
Expand All @@ -46,6 +48,13 @@ info: Transitively unused imports of Init.System.IO:
#guard_msgs in
#unused_transitive_imports Init.Control.StateRef Init.System.IO Init.Control.Reader Init.Control.Basic

/--
info: Transitively unused imports of ImportGraphTest.Used:
ImportGraphTest.Unused
-/
#guard_msgs in
#unused_transitive_imports ImportGraphTest.Used ImportGraphTest.Unused Init.Control.Reader

-- This is a spurious unused transitive import, because it relies on notation from `Init.Core`.
/--
info: Transitively unused imports of Init.Control.Basic:
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ There are a few commands implemented, which help you analysing the imports of a
(Must be run at the end of the file. Tactics and macros may result in incorrect output.)
* `#find_home decl`: suggests files higher up the import hierarchy to which `decl` could be moved.

## Other executables

`lake exe unused_transitive_imports m1 m2 ...`

For each specified module `m`, prints those `n` from the argument list which are imported, but transitively unused by `m`.

## Installation

The installation works exactly like for any [Lake package](https://reservoir.lean-lang.org/),
Expand Down
7 changes: 7 additions & 0 deletions lakefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,12 @@ root = "Main"
# Remove this line if you do not need such functionality.
supportInterpreter = true

# `lake exe unused_transitive_imports` prints unused transitive imports from amongst a given list of modules.
[[lean_exe]]
name = "unused_transitive_imports"
root = "ImportGraph.UnusedTransitiveImports"
supportInterpreter = true

[[lean_lib]]
name = "ImportGraphTest"