Skip to content

Commit 97e7819

Browse files
authored
Merge pull request #817 from macrocosm-os/release/v3.0.6
- v3.0.6 release. - Bump bittensor to v9.9.0 to address commit reveal error during weight set. - Fix deep researcher final answer sometimes formatted as a JSON.
2 parents 37a14f0 + 521a3b6 commit 97e7819

File tree

4 files changed

+1916
-1867
lines changed

4 files changed

+1916
-1867
lines changed

apex/services/deep_research/deep_research_langchain.py

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ async def invoke(
171171
collected_sources: list[dict[str, str]] = []
172172
seen_urls: set[str] = set()
173173

174-
agent_chain = self._build_agent_chain()
174+
agent_chain = await self._build_agent_chain()
175175

176176
while step_index < max_iterations:
177177
logger.debug(f"Starting deep researcher {step_index + 1}/{max_iterations} step")
@@ -202,17 +202,18 @@ async def invoke(
202202

203203
# Final answer branch
204204
if "final_answer" in parsed:
205-
logger.debug("Early-stopping deep research due to the final answer")
206205
final_answer = str(parsed.get("final_answer", ""))
207206
reasoning_traces.append(
208207
{
209208
"step": f"iteration-{step_index}",
210209
"model": getattr(self.research_model, "model_name", "unknown"),
211210
"thought": thought,
212-
"final_answer": final_answer,
211+
"observation": final_answer,
213212
}
214213
)
215-
return final_answer, self.tool_history, reasoning_traces
214+
logger.debug("Early-stopping deep research due to the final answer")
215+
# return final_answer, self.tool_history, reasoning_traces
216+
break
216217

217218
# Action branch (only websearch supported)
218219
action = parsed.get("action") or {}
@@ -255,7 +256,7 @@ async def invoke(
255256
"model": getattr(self.research_model, "model_name", "unknown"),
256257
"thought": thought,
257258
"action": {"tool": "websearch", "query": query, "max_results": max_results},
258-
"observation": observation_text[:1000],
259+
"observation": observation_text[:1200],
259260
}
260261
)
261262
continue
@@ -289,7 +290,7 @@ async def invoke(
289290
"model": getattr(self.research_model, "model_name", "unknown"),
290291
"thought": thought,
291292
"action": {"tool": "python_repl", "code": code[:1000]},
292-
"observation": observation_text[:1000],
293+
"observation": observation_text[:1200],
293294
}
294295
)
295296
continue
@@ -305,19 +306,9 @@ async def invoke(
305306
)
306307
notes.append("Agent returned an unsupported action. Use the websearch tool or provide final_answer.")
307308

308-
# Fallback: if loop ends without final answer, ask final model to synthesize from notes
309+
# If loop ends without final answer, ask final model to synthesize from notes.
309310
logger.debug("Generating final answer")
310-
final_prompt = PromptTemplate(
311-
input_variables=["question", "notes", "sources"],
312-
template=(
313-
self._FINAL_ANSWER_INST + "Do NOT use JSON, or any other structured data format.\n"
314-
"Question:\n{question}\n\n"
315-
"Notes:\n{notes}\n\n"
316-
"Sources:\n{sources}\n\n"
317-
"Research Report:"
318-
),
319-
)
320-
final_chain = final_prompt | self.final_model | StrOutputParser()
311+
final_chain = await self._build_final_chain()
321312

322313
final_report: str = await self._try_invoke(
323314
final_chain,
@@ -336,6 +327,19 @@ async def invoke(
336327
)
337328
return final_report, self.tool_history, reasoning_traces
338329

330+
async def _build_final_chain(self) -> RunnableSerializable[dict[str, Any], str]:
331+
final_prompt = PromptTemplate(
332+
input_variables=["question", "notes", "sources"],
333+
template=(
334+
self._FINAL_ANSWER_INST + "Do NOT use JSON, or any other structured data format. Provide \n"
335+
"Question:\n{question}\n\n"
336+
"Notes:\n{notes}\n\n"
337+
"Sources:\n{sources}\n\n"
338+
"Research Report:"
339+
),
340+
)
341+
return final_prompt | self.final_model | StrOutputParser()
342+
339343
def _render_sources(self, collected_sources: list[dict[str, str]], max_items: int = 12) -> str:
340344
if not collected_sources:
341345
return "(none)"
@@ -352,47 +356,49 @@ def _render_notes(self, notes: list[str], max_items: int = 8) -> str:
352356
clipped = notes[-max_items:]
353357
return "\n".join(f"- {item}" for item in clipped)
354358

355-
def _build_agent_chain(self) -> RunnableSerializable[dict[str, Any], str]:
359+
async def _build_agent_chain(self) -> RunnableSerializable[dict[str, Any], str]:
356360
prompt = PromptTemplate(
357361
input_variables=["question", "notes", "sources"],
358362
template=(
359363
"You are DeepResearcher, a meticulous, tool-using research agent.\n"
360364
"You can use exactly these tools: websearch, python_repl.\n\n"
361365
"Tool: websearch\n"
362-
"- description: Search the web for relevant information.\n"
363-
"- args: keys: 'query' (string), 'max_results' (integer <= 10)\n\n"
366+
" - description: Search the web for relevant information.\n"
367+
" - args: keys: 'query' (string), 'max_results' (integer <= 10)\n\n"
364368
"Tool: python_repl\n"
365-
"- description: A Python shell for executing Python commands.\n"
366-
"- note: Print values to see output, e.g., `print(...)`.\n"
367-
"- args: keys: 'code' (string: valid python command).\n\n"
369+
" - description: A Python shell for executing Python commands.\n"
370+
" - note: Print values to see output, e.g., `print(...)`.\n"
371+
" - args: keys: 'code' (string: valid python command).\n\n"
368372
"Follow an iterative think-act-observe loop. "
369373
"Prefer rich internal reasoning over issuing many tool calls.\n"
370374
"Spend time thinking: produce substantial, explicit reasoning in each 'thought'.\n"
371375
"Avoid giving a final answer too early. Aim for at least 6 detailed thoughts before finalizing,\n"
372376
"unless the question is truly trivial. "
373377
"If no tool use is needed in a step, still provide a reflective 'thought'\n"
374378
"that evaluates evidence, identifies gaps, and plans the next step.\n\n"
375-
"Always respond in strict JSON. Use one of the two schemas:\n\n"
376-
"1) Action step (JSON keys shown with dot-paths):\n"
377-
"- thought: string\n"
378-
"- action.tool: 'websearch' | 'python_repl'\n"
379-
"- action.input: for websearch -> {{query: string, max_results: integer}}\n"
380-
"- action.input: for python_repl -> {{code: string}}\n\n"
381-
"2) Final answer step:\n"
382-
"- thought: string\n"
383-
"- final_answer: string (use plain text for final answer, not a JSON)\n\n"
379+
"Always respond in strict JSON for deep research steps (do not use JSON for final answer). "
380+
"Use one of the two schemas:\n\n"
381+
"1. Action step (JSON keys shown with dot-paths):\n"
382+
" - thought: string\n"
383+
" - action.tool: 'websearch' | 'python_repl'\n"
384+
" - action.input: for websearch -> {{query: string, max_results: integer}}\n"
385+
" - action.input: for python_repl -> {{code: string}}\n\n"
386+
"2. Final answer step:\n"
387+
" - thought: string\n"
388+
" - final_answer: string\n\n"
384389
"In every step, make 'thought' a detailed paragraph (120-200 words) that:\n"
385-
"- Summarizes what is known and unknown so far\n"
386-
"- Justifies the chosen next action or decision not to act\n"
387-
"- Evaluates evidence quality and cites source numbers when applicable\n"
388-
"- Identifies risks, uncertainties, and alternative hypotheses\n\n"
390+
" - Summarizes what is known and unknown so far\n"
391+
" - Justifies the chosen next action or decision not to act\n"
392+
" - Evaluates evidence quality and cites source numbers when applicable\n"
393+
" - Identifies risks, uncertainties, and alternative hypotheses\n\n"
394+
"Respond with JSON only during deep research steps, "
395+
"final answer must be always in a plain text formatted as a research report, with sections:\n"
389396
"Executive Summary, Key Findings, Evidence, Limitations, Conclusion.\n"
390397
"Use inline numeric citations like [1], [2] that refer to Sources.\n"
391398
"Include a final section titled 'Sources' listing the numbered citations.\n\n"
392399
"Question:\n{question}\n\n"
393400
"Notes and observations so far:\n{notes}\n\n"
394401
"Sources (use these for citations):\n{sources}\n\n"
395-
"Respond with JSON always, except for final_anwer (use plain text)."
396402
),
397403
)
398404
return prompt | self.research_model | StrOutputParser()

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "apex"
3-
version = "3.0.5"
3+
version = "3.0.6"
44
description = "Bittensor Subnet 1: Apex"
55
readme = "README.md"
66
requires-python = "~=3.11"
@@ -26,7 +26,7 @@ dependencies = [
2626
"loguru>=0.7.3",
2727
"tavily-python>=0.7.10",
2828
"pip>=25.1.1",
29-
"bittensor>=9.8.3",
29+
"bittensor==9.9.0",
3030
"rouge>=1.0.1",
3131
"substrate-interface>=1.7.11",
3232
"types-netaddr>=1.3.0.20240530",

tests/services/deep_research/test_deep_research_langchain.py

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest.mock import AsyncMock, MagicMock, patch
23

34
import pytest
@@ -76,17 +77,26 @@ async def test_invoke_with_documents_in_body(deep_research_langchain, mock_webse
7677
body = {"documents": [{"page_content": "doc1"}, {"page_content": "doc2"}]}
7778

7879
with (
79-
patch("apex.services.deep_research.deep_research_langchain.PromptTemplate") as mock_prompt_template,
80-
patch("apex.services.deep_research.deep_research_langchain.StrOutputParser"),
80+
patch(
81+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_agent_chain"
82+
) as mock_build_agent_chain,
83+
patch(
84+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_final_chain"
85+
) as mock_build_final_chain,
8186
):
8287
agent_chain = AsyncMock()
83-
agent_chain.ainvoke.return_value = '{"thought": "enough info", "final_answer": "final_report"}'
84-
mock_prompt_template.return_value.__or__.return_value.__or__.return_value = agent_chain
88+
return_value = json.dumps({"thought": "enough info", "final_answer": "final_report"})
89+
agent_chain.ainvoke.return_value = return_value
90+
mock_build_agent_chain.return_value = agent_chain
91+
92+
final_chain_mock = AsyncMock()
93+
final_chain_mock.ainvoke.return_value = return_value
94+
mock_build_final_chain.return_value = final_chain_mock
8595

8696
result = await deep_research_langchain.invoke(messages, body)
8797

8898
mock_websearch.search.assert_not_called()
89-
assert result[0] == "final_report"
99+
assert result[0] == return_value
90100

91101

92102
@pytest.mark.asyncio
@@ -96,8 +106,12 @@ async def test_invoke_with_websearch(deep_research_langchain, mock_websearch):
96106
mock_websearch.search.return_value = [MagicMock(content="web_doc", url="http://a.com", title="A")]
97107

98108
with (
99-
patch("apex.services.deep_research.deep_research_langchain.PromptTemplate") as mock_prompt_template,
100-
patch("apex.services.deep_research.deep_research_langchain.StrOutputParser"),
109+
patch(
110+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_agent_chain"
111+
) as mock_build_agent_chain,
112+
patch(
113+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_final_chain"
114+
) as mock_build_final_chain,
101115
):
102116
agent_chain = AsyncMock()
103117
agent_chain.ainvoke.side_effect = [
@@ -107,7 +121,11 @@ async def test_invoke_with_websearch(deep_research_langchain, mock_websearch):
107121
),
108122
'{"thought": "done", "final_answer": "final_answer"}',
109123
]
110-
mock_prompt_template.return_value.__or__.return_value.__or__.return_value = agent_chain
124+
mock_build_agent_chain.return_value = agent_chain
125+
126+
final_chain_mock = AsyncMock()
127+
final_chain_mock.ainvoke.return_value = "final_answer"
128+
mock_build_final_chain.return_value = final_chain_mock
111129

112130
result = await deep_research_langchain.invoke(messages)
113131

@@ -121,17 +139,26 @@ async def test_invoke_no_websearch_needed_final_answer(deep_research_langchain,
121139
messages = [{"role": "user", "content": "test question"}]
122140

123141
with (
124-
patch("apex.services.deep_research.deep_research_langchain.PromptTemplate") as mock_prompt_template,
125-
patch("apex.services.deep_research.deep_research_langchain.StrOutputParser"),
142+
patch(
143+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_agent_chain"
144+
) as mock_build_agent_chain,
145+
patch(
146+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_final_chain"
147+
) as mock_build_final_chain,
126148
):
127149
agent_chain = AsyncMock()
128-
agent_chain.ainvoke.return_value = '{"thought": "clear", "final_answer": "final_report"}'
129-
mock_prompt_template.return_value.__or__.return_value.__or__.return_value = agent_chain
150+
return_value = json.dumps({"thought": "enough info", "final_answer": "final_report"})
151+
agent_chain.ainvoke.return_value = return_value
152+
mock_build_agent_chain.return_value = agent_chain
153+
154+
final_chain_mock = AsyncMock()
155+
final_chain_mock.ainvoke.return_value = return_value
156+
mock_build_final_chain.return_value = final_chain_mock
130157

131158
result = await deep_research_langchain.invoke(messages)
132159

133160
mock_websearch.search.assert_not_called()
134-
assert result[0] == "final_report"
161+
assert result[0] == return_value
135162

136163

137164
@pytest.mark.asyncio
@@ -149,8 +176,12 @@ async def test_full_invoke_flow_with_multiple_actions(deep_research_langchain, m
149176
]
150177

151178
with (
152-
patch("apex.services.deep_research.deep_research_langchain.PromptTemplate") as mock_prompt_template,
153-
patch("apex.services.deep_research.deep_research_langchain.StrOutputParser"),
179+
patch(
180+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_agent_chain"
181+
) as mock_build_agent_chain,
182+
patch(
183+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_final_chain"
184+
) as mock_build_final_chain,
154185
):
155186
agent_chain = AsyncMock()
156187
agent_chain.ainvoke.side_effect = [
@@ -164,7 +195,11 @@ async def test_full_invoke_flow_with_multiple_actions(deep_research_langchain, m
164195
),
165196
'{"thought": "complete", "final_answer": "final_report"}',
166197
]
167-
mock_prompt_template.return_value.__or__.return_value.__or__.return_value = agent_chain
198+
mock_build_agent_chain.return_value = agent_chain
199+
200+
final_chain_mock = AsyncMock()
201+
final_chain_mock.ainvoke.return_value = "final_report"
202+
mock_build_final_chain.return_value = final_chain_mock
168203

169204
result = await deep_research_langchain.invoke(messages)
170205

@@ -186,15 +221,23 @@ async def test_full_invoke_flow_with_multiple_actions(deep_research_langchain, m
186221
async def test_invoke_with_python_repl(deep_research_langchain):
187222
"""Agent chooses python_repl then produces final answer."""
188223
with (
189-
patch("apex.services.deep_research.deep_research_langchain.PromptTemplate") as mock_prompt_template,
190-
patch("apex.services.deep_research.deep_research_langchain.StrOutputParser"),
224+
patch(
225+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_agent_chain"
226+
) as mock_build_agent_chain,
227+
patch(
228+
"apex.services.deep_research.deep_research_langchain.DeepResearchLangchain._build_final_chain"
229+
) as mock_build_final_chain,
191230
):
192231
agent_chain = AsyncMock()
193232
agent_chain.ainvoke.side_effect = [
194233
('{"thought": "compute needed", "action": {"tool": "python_repl", "input": {"code": "print(1+1)"}}}'),
195234
'{"thought": "done", "final_answer": "final_answer"}',
196235
]
197-
mock_prompt_template.return_value.__or__.return_value.__or__.return_value = agent_chain
236+
mock_build_agent_chain.return_value = agent_chain
237+
238+
final_chain_mock = AsyncMock()
239+
final_chain_mock.ainvoke.return_value = "final_answer"
240+
mock_build_final_chain.return_value = final_chain_mock
198241

199242
result = await deep_research_langchain.invoke([{"role": "user", "content": "q"}])
200243

0 commit comments

Comments
 (0)