@@ -84,12 +84,15 @@ async def run_single(self, task: QueryTask) -> str:
8484
8585 reference = None
8686 tool_history : list [dict [str , str ]] = []
87+ reasoning_traces : list [dict [str , Any ]] = []
8788 if random .random () < self .reference_rate :
8889 try :
8990 generator_results = None
9091 ground_truth = 0
9192 logger .debug (f"Generating task reference for query: { query [:20 ]} .." )
92- reference , tool_history = await generate_reference (llm = self .deep_research , query = query )
93+ reference , tool_history , reasoning_traces = await generate_reference (
94+ llm = self .deep_research , query = query
95+ )
9396 except BaseException as exc :
9497 logger .exception (f"Failed to generate reference: { exc } " )
9598
@@ -100,7 +103,9 @@ async def run_single(self, task: QueryTask) -> str:
100103 if random .random () < self .redundancy_rate :
101104 try :
102105 logger .debug (f"Generating redundant task reference for query: { query [:20 ]} .." )
103- reference , tool_history = await generate_reference (llm = self .deep_research , query = query )
106+ reference , tool_history , reasoning_traces = await generate_reference (
107+ llm = self .deep_research , query = query
108+ )
104109 except BaseException as exc :
105110 logger .warning (f"Failed to generate redundant reference: { exc } " )
106111
@@ -111,7 +116,10 @@ async def run_single(self, task: QueryTask) -> str:
111116
112117 if self .logger_wandb :
113118 await self .logger_wandb .log (
114- reference = reference , discriminator_results = discriminator_results , tool_history = tool_history
119+ reference = reference ,
120+ discriminator_results = discriminator_results ,
121+ tool_history = tool_history ,
122+ reasoning_traces = reasoning_traces ,
115123 )
116124
117125 if self ._debug :
0 commit comments