-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathverifier.py
182 lines (154 loc) · 7.5 KB
/
verifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import re
import os
import tempfile
import subprocess
from sympy import isprime
class Verifier:
def __init__(self):
# Check if Lean 3 is installed
try:
subprocess.run(['lean', '--version'], capture_output=True, check=True)
self.lean_installed = True
except (subprocess.CalledProcessError, FileNotFoundError):
self.lean_installed = False
print("Warning: Lean 3 is not installed. Please install it using:\n"
"1. brew install elan\n"
"2. elan default leanprover-community/lean:3.50.3\n"
"3. pip install mathlibtools")
def parse_hypothesis(self, pipeline_result: str):
# First try to find an explicitly stated hypothesis
pattern = r"(?:2\. Hypothesis:|Hypothesis:)(.*?)(?:\n\d\.|\Z)"
match = re.search(pattern, pipeline_result, re.DOTALL)
if match:
return match.group(1).strip()
# If no explicit hypothesis, try to find a Lean theorem statement
lean_pattern = r"(?:theorem|lemma|conjecture)\s+\w+\s*\((.*?)\)\s*:"
match = re.search(lean_pattern, pipeline_result, re.DOTALL)
if match:
return f"Hypothesis extracted from Lean theorem: {match.group(1)}"
return None
def extract_lean_code(self, pipeline_result: str):
lean_blocks = []
pattern = r"```lean\n(.*?)```"
matches = re.finditer(pattern, pipeline_result, re.DOTALL)
for match in matches:
lean_blocks.append(match.group(1).strip())
return "\n\n".join(lean_blocks)
def verify_lean_proof(self, lean_code: str) -> str:
if not self.lean_installed:
return ("Lean 3 is not installed. Please install it using:\n"
"1. brew install elan\n"
"2. elan default leanprover-community/lean:3.50.3\n"
"3. pip install mathlibtools")
# Check if this is an unproven conjecture
if "sorry" in lean_code or "admitted" in lean_code or "conjecture" in lean_code:
return "Lean Verification: This is an unproven conjecture. The formal statement has been provided but the proof is incomplete."
try:
# Create a temporary directory for the Lean project
with tempfile.TemporaryDirectory() as temp_dir:
# Create a basic Lean 3 project structure
project_dir = os.path.join(temp_dir, 'temp_proof')
os.makedirs(project_dir)
os.makedirs(os.path.join(project_dir, 'src'))
# Create leanpkg.toml
with open(os.path.join(project_dir, 'leanpkg.toml'), 'w') as f:
f.write('''[package]
name = "temp_proof"
version = "0.1"
lean_version = "leanprover-community/lean:3.50.3"
[dependencies]
mathlib = {git = "https://github.com/leanprover-community/mathlib", rev = "master"}''')
# Write the proof to a file
proof_file = os.path.join(project_dir, 'src', 'main.lean')
with open(proof_file, 'w') as f:
f.write(lean_code)
# Try to compile the proof
try:
result = subprocess.run(
['lean', proof_file],
cwd=project_dir,
capture_output=True,
text=True,
timeout=30 # Add timeout to prevent hanging
)
if result.returncode == 0:
return "Lean Verification: Proof successfully type-checked!"
else:
return f"Lean Verification Failed:\n{result.stderr}"
except subprocess.TimeoutExpired:
return "Lean Verification: Proof checking timed out after 30 seconds"
except subprocess.CalledProcessError as e:
return f"Lean Compilation Error: {e.stderr}"
except Exception as e:
return f"Lean Verification Error: {str(e)}"
def test_collatz_sequence(self, hypothesis: str, limit=100):
"""Test the modified Collatz sequence for the first n numbers"""
def modified_collatz(n):
if n % 2 == 0:
return n // 2
return 5 * n + 1
results = []
for start in range(1, limit + 1):
n = start
sequence = [n]
seen = {n}
while n != 1 and len(sequence) < 1000: # Prevent infinite loops
n = modified_collatz(n)
sequence.append(n)
if n in seen:
results.append(f"Found cycle for {start}: {sequence}")
break
seen.add(n)
if n == 1:
results.append(f"Sequence for {start} reaches 1 in {len(sequence)} steps")
break
if len(sequence) >= 1000:
results.append(f"Sequence for {start} exceeds 1000 terms")
return "\n".join(results[:5]) + "\n..." if len(results) > 5 else "\n".join(results)
def test_prime_mod_pattern(self, hypothesis: str):
mod_pattern = r"p *≡ *([\d\sor]+)\(mod *(\d+)\)"
match = re.search(mod_pattern, hypothesis)
if not match:
return "Could not parse modular condition for automated testing."
residues_str = match.group(1).strip()
mod_val = int(match.group(2))
residues = [int(x.strip()) for x in residues_str.split('or') if x.strip().isdigit()]
bound_match = re.search(r"p *>(\d+)", hypothesis)
lower_bound = int(bound_match.group(1)) if bound_match else 2
limit = 10000
fail_count = 0
counterexamples = []
for n in range(lower_bound+1, limit):
if isprime(n):
if all((n % mod_val) != r for r in residues):
fail_count += 1
counterexamples.append(n)
if fail_count > 5:
break
if fail_count == 0:
return f"Empirical Verification Passed: No counterexamples found up to {limit}."
else:
return f"Empirical Verification Failed: Found {fail_count} counterexamples up to {limit}. First few: {counterexamples[:3]}"
def test_hypothesis(self, hypothesis: str):
if "collatz" in hypothesis.lower():
return f"Empirical Testing of Modified Collatz Sequence:\n{self.test_collatz_sequence(hypothesis)}"
elif "prime" in hypothesis.lower() and "mod" in hypothesis.lower():
return self.test_prime_mod_pattern(hypothesis)
return "No automated empirical verification was performed because hypothesis could not be parsed."
def verify(self, pipeline_result: str):
results = []
# Extract and verify Lean proof
lean_code = self.extract_lean_code(pipeline_result)
if lean_code:
lean_result = self.verify_lean_proof(lean_code)
results.append(lean_result)
else:
results.append("No Lean code found in the pipeline result.")
# Run empirical verification
hypothesis = self.parse_hypothesis(pipeline_result)
if hypothesis:
empirical_result = self.test_hypothesis(hypothesis)
results.append(empirical_result)
else:
results.append("No hypothesis found in the pipeline result.")
return "\n\n".join(results)