Benchify · maxvonhippel · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/generator/lean_parse_check/Main.lean b/generator/lean_parse_check/Main.lean
@@ -0,0 +1,22 @@
+import Lean.Parser
+
+open Lean Parser
+
+def main (_args : List String) : IO UInt32 := do
+  -- Read Lean code from stdin
+  let input ← IO.getStdin >>= fun h => h.readToEnd
+
+  -- Create empty environment. This only knows builtin syntax.
+  -- User-defined tactics/syntax will be "unknown" but that's OK -
+  -- we only care about catching true parse errors (malformed syntax).
+  let env ← Lean.mkEmptyEnvironment
+
+  -- Try to parse as command syntax (top-level Lean commands)
+  match runParserCategory env `command input "<stdin>" with
+  | .ok _syntax =>
+    -- Parse succeeded
+    return 0
+  | .error msg =>
+    -- Parse failed - print error and exit with code 1
+    IO.eprintln msg
+    return 1
diff --git a/generator/lean_parse_check/lake-manifest.json b/generator/lean_parse_check/lake-manifest.json
@@ -0,0 +1,5 @@
+{"version": "1.1.0",
+ "packagesDir": ".lake/packages",
+ "packages": [],
+ "name": "lean_parse_check",
+ "lakeDir": ".lake"}
diff --git a/generator/lean_parse_check/lakefile.toml b/generator/lean_parse_check/lakefile.toml
@@ -0,0 +1,7 @@
+name = "lean_parse_check"
+version = "0.1.0"
+defaultTargets = ["lean_parse_check"]
+
+[[lean_exe]]
+name = "lean_parse_check"
+root = "Main"
diff --git a/generator/lean_parse_check/lean-toolchain b/generator/lean_parse_check/lean-toolchain
@@ -0,0 +1 @@
+leanprover/lean4:v4.27.0
diff --git a/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000000 b/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000000
@@ -0,0 +1,4 @@
+import Lean
+import Lean.Meta
+
+set_option autoImplicit true
diff --git a/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000001 b/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000001
@@ -0,0 +1,2 @@
+import Lean
+import Lean.Meta
diff --git a/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000002 b/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000002
@@ -0,0 +1 @@
+set_option autoImplicit true
diff --git a/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000003 b/generator/lean_parse_check/workdir/outputs/chunks/chunk_000000003
@@ -0,0 +1 @@
+default
diff --git a/generator/rust-toolchain.toml b/generator/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "nightly"
diff --git a/generator/src/bin/main.rs b/generator/src/bin/main.rs
@@ -4,6 +4,7 @@ use std::fs;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
+use core::time::Duration as CoreDuration;
 
 use clap::Parser;
 use comfy_table::{Table, Cell, Color, Attribute, presets::UTF8_FULL};
@@ -30,7 +31,7 @@ use libafl_bolts::current_nanos;
 use libafl_bolts::rands::StdRand;
 use libafl_bolts::tuples::tuple_list;
 
-use generator::{create_context, create_prefix_context, generate_one};
+use generator::{create_context, create_prefix_context, generate_one, extract_rules_used, RuleCoverage, Outcome};
 use std::path::Path;
 
 const TEMPLATE_DIR: &str = "../template";
@@ -78,7 +79,12 @@ impl VerifierStats {
         }
     }
 
-    fn print_if_due(&self) { self.print_table_internal(false); }
+    fn print_if_due(&self, coverage: Option<&RuleCoverage>) {
+        self.print_table_internal(false);
+        if let Some(cov) = coverage {
+            cov.print_report(10);
+        }
+    }
     fn print_final(&self) { self.print_table_internal(true); }
 
     fn print_table_internal(&self, force: bool) {
@@ -171,7 +177,7 @@ fn setup_temp_environment(template_dir: &Path, code: &str) -> Result<(TempDir, P
     opts.copy_inside = true;
     fs_extra::dir::copy(template_dir, temp_dir.path(), &opts)
         .map_err(|e| format!("copy: {e}"))?;
-    let _ = fs::remove_dir_all(temp_template.join(".lake"));
+    // Keep .lake/ — deleting it forces full rebuild per test (very slow)
     fs::write(temp_template.join("Solution.lean"), code)
         .map_err(|e| format!("write: {e}"))?;
     Ok((temp_dir, temp_template))
@@ -205,9 +211,9 @@ fn run_safeverify(dir: &Path, bin: &str) -> bool {
             .output().map_or(false, |o| o.status.success())
 }
 
-fn categorize_and_save(lake: bool, comp: bool, safe: bool, code: &str, stats: &VerifierStats) -> bool {
+fn categorize_and_save(lake: bool, comp: bool, safe: bool, code: &str, stats: &VerifierStats, coverage: Option<&RuleCoverage>) -> bool {
     stats.record(lake, comp, safe);
-    stats.print_if_due();
+    stats.print_if_due(coverage);
 
     let cat = format!("solutions/lake_{}_comp_{}_safe_{}",
         if lake {"pass"} else {"fail"},
@@ -336,22 +342,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let stats = Arc::new(VerifierStats::new());
     let stats_ref = Arc::clone(&stats);
 
-    // Harness: iterate golden suffixes, run verifiers on first compile success
+    // Rule coverage tracking
+    let coverage = Arc::new(RuleCoverage::new());
+    let coverage_ref = Arc::clone(&coverage);
+
+    // Harness: one temp dir per prefix, try golden suffixes only if prefix compiles
     let mut harness = |input: &NautilusInput| -> ExitKind {
         let prefix = generate_one(&ctx, input);
+
+        // Extract which grammar rules were used (for coverage tracking)
+        let rules_used = extract_rules_used(&prefix);
+
+        // Create ONE temp dir for this prefix (reused across all suffixes)
+        let (_td, temp) = match setup_temp_environment(&template_dir, &prefix) {
+            Ok(v) => v,
+            Err(_) => {
+                // Track that these rules failed to compile
+                coverage_ref.record(&rules_used, Outcome::Failed);
+                return ExitKind::Ok;
+            }
+        };
+
+        // Quick check: does the prefix alone compile?
+        if !run_lake_build(&temp) {
+            stats_ref.record(false, false, false);
+            stats_ref.print_if_due(Some(&coverage_ref));
+            // Track that these rules failed to compile
+            coverage_ref.record(&rules_used, Outcome::Failed);
+            return ExitKind::Ok;
+        }
+
+        // Prefix compiles! Now try each golden suffix in the SAME temp dir.
         let mut best_is_crash = false;
+        let mut best_outcome = Outcome::CompiledOnly;
 
         for &(suffix_name, golden_suffix) in GOLDEN_SUFFIXES {
             let code = format!("{}{}", prefix, golden_suffix);
-
-            let (_td, temp) = match setup_temp_environment(&template_dir, &code) {
-                Ok(v) => v,
-                Err(_) => continue,
-            };
-
+            // Overwrite Solution.lean with prefix+suffix
+            if fs::write(temp.join("Solution.lean"), &code).is_err() { continue; }
             if !run_lake_build(&temp) { continue; }
 
-            // Lake passed! Run verifiers.
+            // This suffix compiled! Run verifiers.
             let comp = run_comparator(&temp, &comparator_path);
             let safe = run_safeverify(&temp, &safeverify_path);
 
@@ -360,25 +391,38 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 if comp { "PASS" } else { "FAIL" },
                 if safe { "PASS" } else { "FAIL" });
 
-            if categorize_and_save(true, comp, safe, &code, &stats_ref) {
+            // Determine outcome for coverage tracking
+            let outcome = match (comp, safe) {
+                (true, true) => Outcome::PassedBoth,
+                (true, false) => Outcome::PassedComparator,
+                _ => Outcome::CompiledOnly,
+            };
+
+            if outcome.priority() > best_outcome.priority() {
+                best_outcome = outcome;
+            }
+
+            if categorize_and_save(true, comp, safe, &code, &stats_ref, Some(&coverage_ref)) {
                 best_is_crash = true;
             }
         }
 
-        // Record FAIL/FAIL/FAIL once if no suffix compiled
-        if !best_is_crash {
-            stats_ref.record(false, false, false);
-            stats_ref.print_if_due();
-        }
+        // Record rule usage with best outcome achieved
+        coverage_ref.record(&rules_used, best_outcome);
 
         if best_is_crash { ExitKind::Crash } else { ExitKind::Ok }
     };
 
-    let mut executor = InProcessExecutor::new(&mut harness, (), &mut fuzzer, &mut state, &mut mgr)?;
+    // 120s timeout per test (7 golden suffixes × lake build can take a while)
+    let mut executor = InProcessExecutor::with_timeout(
+        &mut harness, (), &mut fuzzer, &mut state, &mut mgr,
+        CoreDuration::from_secs(120),
+    )?;
     let mut generator = NautilusGenerator::new(&ctx);
 
+    // Single initial seed (each seed runs 7 golden suffixes × lake build, so keep it minimal)
     println!("[*] Generating initial corpus...");
-    state.generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 2)?;
+    state.generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 1)?;
     println!("[*] Initial corpus generated");
 
     let mutator = HavocScheduledMutator::new(tuple_list!(
@@ -405,6 +449,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     }
     println!("📁 Results: generator/solutions/lake_*_comp_*_safe_*/");
 
+    // Print grammar rule coverage report
+    coverage.print_report(15);
+
     fuzz_result?;
     Ok(())
 }
diff --git a/generator/src/grammar.rs b/generator/src/grammar.rs
@@ -50,7 +50,6 @@ fn rules_raw() -> Vec<(&'static str, &'static str)> {
         ("FILE", "{PREAMBLE}\n\n{PROGRAM}"),
         ("FILE", "{PREAMBLE}\n\n{PROGRAM}\n\n{PROGRAM}"),
         ("FILE", "{PREAMBLE}\n\n{PROGRAM}\n\n{PROGRAM}\n\n{PROGRAM}"),
-        ("FILE", "{PREAMBLE}"),
 
         // PREAMBLE — imports and universe declarations
         ("PREAMBLE", "{IMPORTS}\n{UNIVERSE_DECLS}\n{OPEN_DECLS}"),
@@ -1092,7 +1091,7 @@ mod tests {
     fn rule_count_regression() {
         let count = rules_raw().len();
         assert_eq!(
-            count, 613,
+            count, 612,
             "expected exactly 613 rules, got {count} — was a rule accidentally added or removed?"
         );
     }
@@ -1160,7 +1159,7 @@ mod tests {
     fn prefix_rule_count_regression() {
         let count = prefix_rules_raw().len();
         assert_eq!(
-            count, 504,
+            count, 503,
             "expected exactly 504 prefix rules, got {count} — was a rule accidentally added or removed?"
         );
     }

diff --git a/generator/src/lib.rs b/generator/src/lib.rs
@@ -1,7 +1,9 @@
 pub mod grammar;
+pub mod rule_coverage;
 
 pub use libafl::generators::nautilus::NautilusContext;
 pub use libafl::inputs::nautilus::NautilusInput;
+pub use rule_coverage::{RuleCoverage, Outcome};
 
 /// Build a `NautilusContext` from the complete Lean 4 grammar.
 ///
@@ -27,3 +29,72 @@ pub fn generate_one(ctx: &NautilusContext, input: &NautilusInput) -> String {
     input.unparse(ctx, &mut bytes);
     String::from_utf8_lossy(&bytes).into_owned()
 }
+
+/// Extract which grammar rules were used to generate this input.
+///
+/// Returns a list of rule names (nonterminals) that appear in the input's
+/// syntax tree. This is used for coverage-guided fuzzing to identify which
+/// rules produce interesting outcomes.
+///
+/// Note: Since NautilusInput's tree structure is internal to LibAFL,
+/// we approximate rule usage by extracting nonterminals from the generated
+/// code using pattern matching. This is a heuristic but works well in practice.
+pub fn extract_rules_used(code: &str) -> Vec<String> {
+    use std::collections::HashSet;
+    let mut rules = HashSet::new();
+
+    // Detect rules by looking for characteristic patterns in generated code
+    if code.contains("theorem ") || code.contains("lemma ") {
+        rules.insert("THEOREM_DECL".to_string());
+    }
+    if code.contains("def ") {
+        rules.insert("DEF_DECL".to_string());
+    }
+    if code.contains("inductive ") {
+        rules.insert("INDUCTIVE_DECL".to_string());
+    }
+    if code.contains("structure ") {
+        rules.insert("STRUCTURE_DECL".to_string());
+    }
+    if code.contains("class ") {
+        rules.insert("CLASS_DECL".to_string());
+    }
+    if code.contains("instance ") {
+        rules.insert("INSTANCE_DECL".to_string());
+    }
+    if code.contains("macro ") {
+        rules.insert("MACRO_DECL".to_string());
+    }
+    if code.contains("syntax ") {
+        rules.insert("SYNTAX_DECL".to_string());
+    }
+    if code.contains("elab ") {
+        rules.insert("ELAB_DECL".to_string());
+    }
+    if code.contains("mutual") {
+        rules.insert("MUTUAL_BLOCK".to_string());
+    }
+    if code.contains("import ") {
+        rules.insert("IMPORTS".to_string());
+    }
+    if code.contains("universe ") {
+        rules.insert("UNIVERSE_DECLS".to_string());
+    }
+    if code.contains("open ") {
+        rules.insert("OPEN_DECLS".to_string());
+    }
+    if code.contains("set_option ") {
+        rules.insert("OPTIONS".to_string());
+    }
+    if code.contains("@[") {
+        rules.insert("ATTRIBUTES".to_string());
+    }
+    if code.contains("where") {
+        rules.insert("WHERE_CLAUSE".to_string());
+    }
+    if code.contains("deriving ") {
+        rules.insert("DERIVING".to_string());
+    }
+
+    rules.into_iter().collect()
+}