first pass working!

quinn-dougherty · Sep 12, 2024 · 4394d0e · 4394d0e
1 parent b4b6cdd
commit 4394d0e
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 22 deletions.
diff --git a/llmfv/.gitignore b/llmfv/.gitignore
@@ -1,3 +1,12 @@
+# test outputs
+test/test_example_func.py
+
+# hypotheses
+.hypothesis/
+
+# env
+.env
+
 # python generated files
 __pycache__/
 *.py[oc]
@@ -6,8 +15,6 @@ dist/
 wheels/
 *.egg-info
 
-# env
-.env
 
 # venv
 .venv

diff --git a/llmfv/README.md b/llmfv/README.md
@@ -1,4 +1,4 @@
-# nb
+# LLM-FV: Toward Scaling Formal Verification Schemes via LLMs
 
 Describe your project here.
 
@@ -13,3 +13,9 @@ Create a `.env` file in `nb/` next to this README with the following:
 ```
 ANTHROPIC_API_KEY="YOUR_KEY_HERE"
 ```
+
+
+# Basic Run
+You can try to run `python example_agent_run.py`. This will attempt to create tests using claude for the example function in `test/example_func.py`.
+
+Once it completes (successfully), you can re-test the final output with `pytest test/test_example_func.py`. 
diff --git a/llmfv/example_agent_run.py b/llmfv/example_agent_run.py
@@ -0,0 +1,14 @@
+from llmfv.claude_prompting import PythonAgent
+
+
+with open('test/example_func.py', 'r') as file:
+    content = file.read()
+
+agent = PythonAgent(
+    input=content,
+    scratchpad="test/test_example_func.py",
+)
+agent.loop_until_condition()
+
+
+print(agent.dump_full_chat_history())
diff --git a/llmfv/llmfv/claude_prompting.py b/llmfv/llmfv/claude_prompting.py
@@ -15,9 +15,10 @@ class DebuggingAgent(ABC):
     def __init__(
         self,
         input: str,
+        scratchpad: str,
         model_name: str = "claude-3-opus-20240229",
         max_tokens_per_message: int = 512,
-        max_iterations: int = 3,
+        max_iterations: int = 5,
     ):
         self.model_name = model_name
         self.max_tokens_per_message = max_tokens_per_message
@@ -26,6 +27,7 @@ def __init__(
         self.client = Anthropic(api_key=ANTHROPIC_API_KEY)
 
         self.input = input
+        self.scratchpad = scratchpad
         self.conversation = []
 
     def send_appended_user_message(self, message: str):
@@ -64,6 +66,7 @@ def loop_until_condition(self):
         loops = 0
         while not self.stopping_condition(returncode) and loops < self.max_iterations:
             loops += 1
+            print(f"Loop {loops}/{self.max_iterations}")
 
             # check that the code is valid
             if not self.verify_output_type(response):
@@ -73,7 +76,6 @@ def loop_until_condition(self):
             # subprocess call to run it and track outputs and exit codes
             stdout, stderr, returncode = self.run_code(response)
 
-
             # if not done, append the response to the conversation and get a new response
             # with secondary prompt scaffold
             response = self.send_appended_user_message(self.CONTINUOUS_PROMPT(stdout, stderr))  # type: ignore
@@ -95,7 +97,8 @@ class PythonAgent(DebuggingAgent):
 
     FIRST_PROMPT = lambda _, x: f"""Please write property tests for this function:\n\n{x}"""
 
-    CONTINUOUS_PROMPT = lambda _, stdout, stderr: f"""Running the code produced the following output:\n\nStandard out:\n{stdout}\n\nStandard error:\n{stderr}\n\n."""
+    CONTINUOUS_PROMPT = lambda _, stdout, stderr: f"""Running the code produced the following output:\n\nStandard out:\n{stdout}\n\nStandard error:\n{stderr}\n\n.
+    Please fix your original output, again only generating code within the 3 backticks."""
 
     def verify_output_type(self, response: str):
         """Check that the model output is only code by looking for the backticks at the start and end."""
@@ -107,11 +110,11 @@ def run_code(self, code: str):
         if code.startswith("python"):
             code = code[6:]
 
-        with open("temp.py", "w") as f:
+        with open(self.scratchpad, "w") as f:
             f.write(code)
 
         result = subprocess.run(
-            ["pytest", "test/test_example_func.py"],
+            ["pytest", self.scratchpad],
             capture_output=True,
             text=True
         )

diff --git a/llmfv/test_agent.py b/llmfv/test_agent.py