modelscope · ployts · Jan 9, 2026 · Jan 8, 2026
diff --git a/openjudge/graders/agent/tool/tool_call_accuracy.py b/openjudge/graders/agent/tool/tool_call_accuracy.py
@@ -220,6 +220,11 @@ def __init__(
             language=language,
         )
 
+        # Pattern to match tool calls in JSON format
+        self._tool_call_pattern = re.compile(
+            r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}', flags=re.DOTALL
+        )
+
     def _parse_tools_from_response(
         self,
         response: str,
@@ -233,10 +238,7 @@ def _parse_tools_from_response(
             List of parsed tool calls.
         """
         tool_calls = []
-
-        # Pattern to match tool calls in JSON format
-        tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}'
-        matches = re.findall(tool_call_pattern, response, re.DOTALL)
+        matches = self._tool_call_pattern.findall(response)
 
         for match in matches:
             try:

diff --git a/openjudge/graders/agent/tool/tool_call_success.py b/openjudge/graders/agent/tool/tool_call_success.py
@@ -6,7 +6,6 @@
 """
 
 import json
-import re
 import textwrap
 from typing import Any, Dict, List, Optional, Union
 
@@ -244,34 +243,6 @@ def __init__(
         )
         self.template = template or DEFAULT_TOOL_CALL_SUCCESS_TEMPLATE
 
-    def _parse_tools_from_response(
-        self,
-        response: str,
-    ) -> List[Dict[str, Any]]:
-        """Extract tool calls from the response.
-
-        Args:
-            response: The response string to extract tool calls from.
-
-        Returns:
-            List of parsed tool calls.
-        """
-        tool_calls = []
-
-        # Pattern to match tool calls in JSON format
-        tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}'
-        matches = re.findall(tool_call_pattern, response, re.DOTALL)
-
-        for match in matches:
-            try:
-                tool_call = json.loads(match)
-                tool_calls.append(tool_call)
-            except json.JSONDecodeError:
-                # Skip invalid JSON
-                continue
-
-        return tool_calls
-
     async def aevaluate(
         self,
         tool_definitions: Union[Dict[str, Any], List[Dict[str, Any]]],

diff --git a/openjudge/graders/code/code_excution.py b/openjudge/graders/code/code_excution.py
@@ -60,6 +60,11 @@ def __init__(
             )
             self.test_framework_available = False
 
+        # Python code pattern in various formats
+        self._python_code_pattern = re.compile(r"```python\n(.*?)\n```", flags=re.DOTALL)
+        # generic code formats
+        self._generic_code_pattern = re.compile(r"```\n(.*?)\n```", flags=re.DOTALL)
+
     def _extract_code(self, content: str) -> str:
         """
         Extract code from content
@@ -71,12 +76,12 @@ def _extract_code(self, content: str) -> str:
             Extracted code
         """
         # Try to find Python code in various formats
-        code_match = re.search(r"```python\n(.*?)\n```", content, re.DOTALL)
+        code_match = self._python_code_pattern.search(content)
         if code_match:
             return code_match.group(1)
 
         # Try other formats
-        code_match = re.search(r"```\n(.*?)\n```", content, re.DOTALL)
+        code_match = self._generic_code_pattern.search(content)
         if code_match:
             return code_match.group(1)
 

diff --git a/openjudge/graders/code/code_style.py b/openjudge/graders/code/code_style.py
@@ -27,6 +27,11 @@ def __init__(self):
             description="Basic code style checking including indentation consistency and naming conventions.",
         )
 
+        self._function_pattern = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")
+        self._variable_pattern = re.compile(r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
+        self._snake_case_pattern = re.compile(r"^[a-z_][a-z0-9_]*$")
+        self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL)
+
     def _check_indentation(self, code: str) -> tuple[bool, str]:
         """Check indentation consistency"""
         lines = code.split("\n")
@@ -58,11 +63,8 @@ def _check_indentation(self, code: str) -> tuple[bool, str]:
     def _check_naming(self, code: str) -> tuple[float, str]:
         """Check naming conventions"""
         # Simple naming check
-        function_pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\("
-        variable_pattern = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*="
-
-        functions = re.findall(function_pattern, code)
-        variables = re.findall(variable_pattern, code)
+        functions = self._function_pattern.findall(code)
+        variables = self._variable_pattern.findall(code)
 
         total_names = len(functions) + len(variables)
         if total_names == 0:
@@ -72,12 +74,12 @@ def _check_naming(self, code: str) -> tuple[float, str]:
 
         # Check function names (should be snake_case)
         for func in functions:
-            if re.match(r"^[a-z_][a-z0-9_]*$", func):
+            if self._snake_case_pattern.match(func):
                 good_names += 1
 
         # Check variable names (should be snake_case)
         for var in variables:
-            if re.match(r"^[a-z_][a-z0-9_]*$", var):
+            if self._snake_case_pattern.match(var):
                 good_names += 1
 
         score = good_names / total_names
@@ -122,8 +124,7 @@ async def aevaluate(self, response: str) -> GraderScore:
             0.5 Code style score: 0.500; Consistent indentation; Naming convention: 1/2 names follow snake_case
         """
         # Extract code blocks
-        code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```"
-        code_blocks = re.findall(code_pattern, response, re.DOTALL)
+        code_blocks = self._code_pattern.findall(response)
 
         if not code_blocks:
             return GraderScore(

diff --git a/openjudge/graders/code/syntax_checker.py b/openjudge/graders/code/syntax_checker.py
@@ -31,6 +31,8 @@ def __init__(self):
             description="Check code syntax using Abstract Syntax Tree to validate Python code blocks.",
         )
 
+        self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL)
+
     async def aevaluate(self, response: str) -> GraderScore:
         """Check code syntax in the provided response.
 
@@ -68,8 +70,7 @@ async def aevaluate(self, response: str) -> GraderScore:
         """
 
         # Extract code blocks
-        code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```"
-        code_blocks = re.findall(code_pattern, response, re.DOTALL)
+        code_blocks = self._code_pattern.findall(response)
 
         if not code_blocks:
             # No code blocks, return neutral score

diff --git a/openjudge/graders/format/ngram_repetition_penalty.py b/openjudge/graders/format/ngram_repetition_penalty.py
@@ -67,10 +67,11 @@ def __init__(
             chinese_only=chinese_only,
         )
 
+        self._think_pattern = re.compile(r"(.*?)", flags=re.DOTALL)
+
     def _extract_thought_process(self, content: str) -> str:
         """Extract thought process"""
-        think_pattern = r"(.*?)"
-        matches = re.findall(think_pattern, content, re.DOTALL)
+        matches = self._think_pattern.findall(content)
         return " ".join(matches) if matches else ""
 
     def _generate_ngrams(self, tokens: List[str]) -> List[tuple]:

diff --git a/openjudge/graders/format/reasoning_format.py b/openjudge/graders/format/reasoning_format.py
@@ -34,7 +34,10 @@ def __init__(self, think_token: str = "think", answer_token: str = "answer"):
             description="Check format reward for thinking format and answer format with proper tags.",
         )
         self.think_token = think_token
+        self.think_pattern = re.compile(f"<{self.think_token}>.*?</{self.think_token}>", flags=re.DOTALL)
+
         self.answer_token = answer_token
+        self.answer_pattern = re.compile(f"<{self.answer_token}>.*?</{self.answer_token}>", flags=re.DOTALL)
 
     # pylint: disable=unused-argument
     async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderScore:
@@ -73,12 +76,10 @@ async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderSco
         """
 
         # Check thinking format tags
-        think_pattern = f"<{self.think_token}>.*?</{self.think_token}>"
-        has_think_tag = bool(re.search(think_pattern, response, re.DOTALL))
+        has_think_tag = bool(self.think_pattern.search(response))
 
         # Check answer format tags
-        answer_pattern = f"<{self.answer_token}>.*?</{self.answer_token}>"
-        has_answer_tag = bool(re.search(answer_pattern, response, re.DOTALL))
+        has_answer_tag = bool(self.answer_pattern.search(response))
 
         # Calculate reward
         reward = 1.0 if has_think_tag and has_answer_tag else 0.0

diff --git a/openjudge/graders/format/reasoning_tool_format.py b/openjudge/graders/format/reasoning_tool_format.py
@@ -26,6 +26,19 @@ def __init__(self) -> None:
             description="Check tool call format including think, answer and tool_call tags with JSON validation.",
         )
 
+        # patterns for identifiying tags
+        self._think_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
+        self._answer_pattern = re.compile(r"<answer>(.*?)</answer>", re.DOTALL)
+        self._tool_call_pattern = re.compile(r"<tool_call>(.*?)</tool_call>", re.DOTALL)
+
+        self._think_answer_pattern = re.compile(r"^\s*<think>.*?</think>\s*<answer>.*?</answer>\s*$", re.DOTALL)
+        self._think_tool_call_pattern = re.compile(
+            r"^\s*<think>.*?</think>\s*(?:<tool_call>.*?</tool_call>\s*)+$", re.DOTALL
+        )
+
+        self._consecutive_start_tool_call_tag_pattern = re.compile(r"<tool_call>\s*<tool_call>")
+        self._consecutive_end_tool_call_tag_pattern = re.compile(r"</tool_call>\s*</tool_call>")
+
     # pylint: disable=too-many-statements
     async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
         """
@@ -69,13 +82,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
         """
 
         # Extract tag contents
-        think_pattern = r"<think>(.*?)</think>"
-        answer_pattern = r"<answer>(.*?)</answer>"
-        tool_call_pattern = r"<tool_call>(.*?)</tool_call>"
-
-        think_matches = re.search(think_pattern, response, re.DOTALL)
-        answer_matches = re.search(answer_pattern, response, re.DOTALL)
-        tool_call_matches = re.findall(tool_call_pattern, response, re.DOTALL)
+        think_matches = self._think_pattern.search(response)
+        answer_matches = self._answer_pattern.search(response)
+        tool_call_matches = self._tool_call_pattern.findall(response)
 
         has_think_tag = think_matches is not None
         has_answer_tag = answer_matches is not None
@@ -89,9 +98,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
             # Case 1: <think></think> + <answer></answer>
             if has_answer_tag and not has_tool_call_tag:
                 # Check overall format
-                format_pattern = r"^\s*<think>.*?</think>\s*<answer>.*?</answer>\s*$"
                 valid_format = bool(
-                    re.match(format_pattern, response, re.DOTALL),
+                    self._think_answer_pattern.match(response),
                 )
 
                 # Check tag occurrence count
@@ -115,9 +123,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
             # Case 2: <think></think> + <tool_call></tool_call>
             elif has_tool_call_tag and not has_answer_tag:
                 # Check overall format
-                format_pattern = r"^\s*<think>.*?</think>\s*(?:<tool_call>.*?</tool_call>\s*)+$"
                 valid_format = bool(
-                    re.match(format_pattern, response, re.DOTALL),
+                    self._think_tool_call_pattern.match(response),
                 )
 
                 # Check <think> tag occurrence count
@@ -133,11 +140,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
 
                 # Check for consecutive duplicate tags
                 if valid_format:
-                    if re.search(
-                        r"</tool_call>\s*</tool_call>",
+                    if self._consecutive_end_tool_call_tag_pattern.search(
                         response,
-                    ) or re.search(
-                        r"<tool_call>\s*<tool_call>",
+                    ) or self._consecutive_start_tool_call_tag_pattern.search(
                         response,
                     ):
                         valid_format = False

diff --git a/openjudge/graders/text/number_accuracy.py b/openjudge/graders/text/number_accuracy.py
@@ -52,12 +52,12 @@ def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
             **kwargs,
         )
         self.tolerance = tolerance
+        self._number_pattern = re.compile(r"-?\d+\.?\d*")
 
     def _extract_numbers(self, text: str) -> List[float]:
         """Extract numbers from text"""
         # Match integers and floating point numbers
-        number_pattern = r"-?\d+\.?\d*"
-        numbers = re.findall(number_pattern, text)
+        numbers = self._number_pattern.findall(text)
         return [float(n) for n in numbers if n]
 
     async def aevaluate(self, response: str, reference_response: str) -> GraderScore: