Skip to content

Commit cf179aa

Browse files
authored
Improve Summarisation for agents (#12)
* student-tutor sythetic conversation basic (with student personal and basic fluency) * save conversation in csv * review and update socratic agent * useful print * dockerignore file update * refactor summary stage in all agents, remove student conversation style for now * fix pytests
1 parent 949fcfc commit cf179aa

15 files changed

+686
-89
lines changed

.dockerignore

+3-2
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,12 @@ reports/
149149
# Synthetic data conversations
150150
src/agents/utils/example_inputs/
151151
src/agents/utils/synthetic_conversations/
152-
src/agents/utils/synthetic_conversation_generator.py
152+
src/agents/utils/synthetic_conversation_generation.py
153153
src/agents/utils/testbench_prompts.py
154154
src/agents/utils/langgraph_viz.py
155155

156156
# development agents
157+
src/agents/base_agent/
157158
src/agents/student_agent/
158159
src/agents/development_agents/
159-
src/agents/google_learnML_agent/
160+
src/agents/google_learnLM_agent/

index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def handler(event, context):
88
"""
99
Lambda handler function
1010
"""
11-
# Log the input event
11+
# Log the input event TODO: remove this line
1212
print("Received event:", json.dumps(event, indent=2))
1313

1414
if "message" not in event:

index_test.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_missing_argument(self):
3131
for arg in arguments:
3232
event = {
3333
"message": "Hello, World",
34-
"params": {"conversation_id": "1234Test"}
34+
"params": {"conversation_id": "1234Test", "conversation_history": [{"type": "user", "content": "Hello, World"}]}
3535
}
3636
event.pop(arg)
3737

@@ -42,7 +42,7 @@ def test_missing_argument(self):
4242
def test_correct_arguments(self):
4343
event = {
4444
"message": "Hello, World",
45-
"params": {"conversation_id": "1234Test"}
45+
"params": {"conversation_id": "1234Test", "conversation_history": [{"type": "user", "content": "Hello, World"}]}
4646
}
4747

4848
result = handler(event, None)
@@ -52,7 +52,7 @@ def test_correct_arguments(self):
5252
def test_correct_response(self):
5353
event = {
5454
"message": "Hello, World",
55-
"params": {"conversation_id": "1234Test"}
55+
"params": {"conversation_id": "1234Test", "conversation_history": [{"type": "user", "content": "Hello, World"}]}
5656
}
5757

5858
result = handler(event, None)

src/agents/base_agent/base_agent.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
try:
22
from ..llm_factory import OpenAILLMs
33
from .base_prompts import \
4-
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
4+
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
55
from ..utils.types import InvokeAgentResponseType
66
except ImportError:
77
from src.agents.llm_factory import OpenAILLMs
88
from src.agents.base_agent.base_prompts import \
9-
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
9+
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
1010
from src.agents.utils.types import InvokeAgentResponseType
1111

1212
from langgraph.graph import StateGraph, START, END
@@ -70,7 +70,7 @@ def call_model(self, state: State, config: RunnableConfig) -> str:
7070
summary = state.get("summary", "")
7171
conversationalStyle = state.get("conversationalStyle", "")
7272
if summary:
73-
system_message += f"## Summary of conversation earlier: {summary} \n\n"
73+
system_message += summary_system_prompt.format(summary=summary)
7474
if conversationalStyle:
7575
system_message += f"## Known conversational style and preferences of the student for this conversation: {conversationalStyle}. \n\nYour answer must be in line with this conversational style."
7676

@@ -144,6 +144,8 @@ def should_summarize(self, state: State) -> str:
144144
messages = state["messages"]
145145
valid_messages = self.check_for_valid_messages(messages)
146146
nr_messages = len(valid_messages)
147+
if len(valid_messages) == 0:
148+
raise Exception("Internal Error: No valid messages found in the conversation history. Conversation history might be empty.")
147149
if "system" in valid_messages[-1].type:
148150
nr_messages -= 1
149151

@@ -185,7 +187,7 @@ def invoke_base_agent(query: str, conversation_history: list, summary: str, conv
185187
print(f'in invoke_base_agent(), query = {query}, thread_id = {session_id}')
186188

187189
config = {"configurable": {"thread_id": session_id, "summary": summary, "conversational_style": conversationalStyle, "question_response_details": question_response_details}}
188-
response_events = agent.app.invoke({"messages": conversation_history + [HumanMessage(content=query)]}, config=config, stream_mode="values") #updates
190+
response_events = agent.app.invoke({"messages": conversation_history, "summary": summary, "conversational_style": conversationalStyle}, config=config, stream_mode="values") #updates
189191
pretty_printed_response = agent.pretty_response_value(response_events) # get last event/ai answer in the response
190192

191193
# Gather Metadata from the agent

src/agents/base_agent/base_prompts.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,20 @@
6262
{pref_guidelines}
6363
"""
6464

65-
summary_prompt = """
66-
You are an AI assistant specializing in concise and accurate summarization. Your task is to summarize the previous conversation, capturing the main topics, key points, user questions, and your responses in a clear and organized format.
67-
68-
Ensure the summary is:
65+
summary_guidelines = """Ensure the summary is:
6966
7067
Concise: Keep the summary brief while including all essential information.
71-
Structured: Organize the summary into sections such as 'Topics Discussed,' 'Key Questions and Responses,' and 'Follow-Up Suggestions' if applicable.
68+
Structured: Organize the summary into sections such as 'Topics Discussed' and 'Top 3 Key Detailed Ideas'.
7269
Neutral and Accurate: Avoid adding interpretations or opinions; focus only on the content shared.
7370
When summarizing: If the conversation is technical, highlight significant concepts, solutions, and terminology. If context involves problem-solving, detail the problem and the steps or solutions provided. If the user asks for creative input, briefly describe the ideas presented.
71+
Last messages: Include the most recent 4 messages to provide context for the summary.
7472
75-
Provide the summary in a bulleted format for clarity. Avoid redundant details while preserving the core intent of the discussion.
76-
"""
73+
Provide the summary in a bulleted format for clarity. Avoid redundant details while preserving the core intent of the discussion."""
74+
75+
summary_prompt = f"""Summarize the conversation between a student and a tutor. Your summary should highlight the major topics discussed during the session, followed by a detailed recollection of the last five significant points or ideas. Ensure the summary flows smoothly to maintain the continuity of the discussion."""
76+
77+
update_summary_prompt = f"""Update the summary by taking into account the new messages above.
78+
79+
{summary_guidelines}"""
7780

78-
update_summary_prompt = "Update the summary by taking into account the new messages above:"
81+
summary_system_prompt = "You are continuing a tutoring session with the student. Background context: {summary}. Use this context to inform your understanding but do not explicitly restate, refer to, or incorporate the details directly in your responses unless the user brings them up. Respond naturally to the user's current input, assuming prior knowledge from the summary."

src/agents/google_learnLM_agent/google_learnLM_agent.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
try:
22
from ..llm_factory import GoogleAILLMs
33
from .google_learnLM_prompts import \
4-
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
4+
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
55
from ..utils.types import InvokeAgentResponseType
66
except ImportError:
77
from src.agents.llm_factory import GoogleAILLMs
88
from src.agents.google_learnLM_agent.google_learnLM_prompts import \
9-
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
9+
role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
1010
from src.agents.utils.types import InvokeAgentResponseType
1111

1212
from langgraph.graph import StateGraph, START, END
@@ -74,7 +74,7 @@ def call_model(self, state: State, config: RunnableConfig) -> str:
7474
summary = state.get("summary", "")
7575
conversationalStyle = state.get("conversationalStyle", "")
7676
if summary:
77-
system_message += f"## Summary of conversation earlier: {summary} \n\n"
77+
system_message += summary_system_prompt.format(summary=summary)
7878
if conversationalStyle:
7979
system_message += f"## Known conversational style and preferences of the student for this conversation: {conversationalStyle}. \n\nYour answer must be in line with this conversational style."
8080

@@ -148,6 +148,8 @@ def should_summarize(self, state: State) -> str:
148148
messages = state["messages"]
149149
valid_messages = self.check_for_valid_messages(messages)
150150
nr_messages = len(valid_messages)
151+
if len(valid_messages) == 0:
152+
raise Exception("Internal Error: No valid messages found in the conversation history. Conversation history might be empty.")
151153
if "system" in valid_messages[-1].type:
152154
nr_messages -= 1
153155

@@ -189,7 +191,7 @@ def invoke_google_learnlm_agent(query: str, conversation_history: list, summary:
189191
print(f'in invoke_google_learnlm_agent(), query = {query}, thread_id = {session_id}')
190192

191193
config = {"configurable": {"thread_id": session_id, "summary": summary, "conversational_style": conversationalStyle, "question_response_details": question_response_details}}
192-
response_events = agent.app.invoke({"messages": conversation_history + [HumanMessage(content=query)]}, config=config, stream_mode="values") #updates
194+
response_events = agent.app.invoke({"messages": conversation_history, "summary": summary, "conversational_style": conversationalStyle}, config=config, stream_mode="values") #updates
193195
pretty_printed_response = agent.pretty_response_value(response_events) # get last event/ai answer in the response
194196

195197
# Gather Metadata from the agent

src/agents/google_learnLM_agent/google_learnLM_prompts.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,20 @@
6262
{pref_guidelines}
6363
"""
6464

65-
summary_prompt = """
66-
You are an AI assistant specializing in concise and accurate summarization. Your task is to summarize the previous conversation, capturing the main topics, key points, user questions, and your responses in a clear and organized format.
67-
68-
Ensure the summary is:
65+
summary_guidelines = """Ensure the summary is:
6966
7067
Concise: Keep the summary brief while including all essential information.
71-
Structured: Organize the summary into sections such as 'Topics Discussed,' 'Key Questions and Responses,' and 'Follow-Up Suggestions' if applicable.
68+
Structured: Organize the summary into sections such as 'Topics Discussed' and 'Top 3 Key Detailed Ideas'.
7269
Neutral and Accurate: Avoid adding interpretations or opinions; focus only on the content shared.
7370
When summarizing: If the conversation is technical, highlight significant concepts, solutions, and terminology. If context involves problem-solving, detail the problem and the steps or solutions provided. If the user asks for creative input, briefly describe the ideas presented.
71+
Last messages: Include the most recent 4 messages to provide context for the summary.
7472
75-
Provide the summary in a bulleted format for clarity. Avoid redundant details while preserving the core intent of the discussion.
76-
"""
73+
Provide the summary in a bulleted format for clarity. Avoid redundant details while preserving the core intent of the discussion."""
74+
75+
summary_prompt = f"""Summarize the conversation between a student and a tutor. Your summary should highlight the major topics discussed during the session, followed by a detailed recollection of the last five significant points or ideas. Ensure the summary flows smoothly to maintain the continuity of the discussion."""
76+
77+
update_summary_prompt = f"""Update the summary by taking into account the new messages above.
78+
79+
{summary_guidelines}"""
7780

78-
update_summary_prompt = "Update the summary by taking into account the new messages above:"
81+
summary_system_prompt = "You are continuing a tutoring session with the student. Background context: {summary}. Use this context to inform your understanding but do not explicitly restate, refer to, or incorporate the details directly in your responses unless the user brings them up. Respond naturally to the user's current input, assuming prior knowledge from the summary."

src/agents/informational_agent/informational_agent.py

+24-19
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
try:
22
from ..llm_factory import OpenAILLMs
33
from .informational_prompts import \
4-
informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
4+
informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
55
from ..utils.types import InvokeAgentResponseType
66
except ImportError:
77
from src.agents.llm_factory import OpenAILLMs
88
from src.agents.informational_agent.informational_prompts import \
9-
informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt
9+
informational_role_prompt, conv_pref_prompt, update_conv_pref_prompt, summary_prompt, update_summary_prompt, summary_system_prompt
1010
from src.agents.utils.types import InvokeAgentResponseType
1111

1212
from langgraph.graph import StateGraph, START, END
@@ -25,6 +25,8 @@
2525
- [informational_role_prompt] role of a tutor to answer student's ALL questions on any topic
2626
"""
2727

28+
# TODO: return/uncomment improved conversational style use & analysis
29+
2830
ValidMessageTypes: TypeAlias = SystemMessage | HumanMessage | AIMessage
2931
AllMessageTypes: TypeAlias = ValidMessageTypes | RemoveMessage
3032

@@ -70,9 +72,9 @@ def call_model(self, state: State, config: RunnableConfig) -> str:
7072
summary = state.get("summary", "")
7173
conversationalStyle = state.get("conversationalStyle", "")
7274
if summary:
73-
system_message += f"## Summary of conversation earlier: {summary} \n\n"
74-
if conversationalStyle:
75-
system_message += f"## Known conversational style and preferences of the student for this conversation: {conversationalStyle}. \n\nYour answer must be in line with this conversational style."
75+
system_message += summary_system_prompt.format(summary=summary)
76+
# if conversationalStyle:
77+
# system_message += f"## Known conversational style and preferences of the student for this conversation: {conversationalStyle}. \n\nYour answer must be in line with this conversational style."
7678

7779
messages = [SystemMessage(content=system_message)] + state['messages']
7880

@@ -99,7 +101,7 @@ def summarize_conversation(self, state: State, config: RunnableConfig) -> dict:
99101

100102
summary = state.get("summary", "")
101103
previous_summary = config["configurable"].get("summary", "")
102-
previous_conversationalStyle = config["configurable"].get("conversational_style", "")
104+
# previous_conversationalStyle = config["configurable"].get("conversational_style", "")
103105
if previous_summary:
104106
summary = previous_summary
105107

@@ -111,28 +113,29 @@ def summarize_conversation(self, state: State, config: RunnableConfig) -> dict:
111113
else:
112114
summary_message = self.summary_prompt
113115

114-
if previous_conversationalStyle:
115-
conversationalStyle_message = (
116-
f"This is the previous conversational style of the student for this conversation: {previous_conversationalStyle}\n\n" +
117-
self.update_conversation_preference_prompt
118-
)
119-
else:
120-
conversationalStyle_message = self.conversation_preference_prompt
116+
# if previous_conversationalStyle:
117+
# conversationalStyle_message = (
118+
# f"This is the previous conversational style of the student for this conversation: {previous_conversationalStyle}\n\n" +
119+
# self.update_conversation_preference_prompt
120+
# )
121+
# else:
122+
# conversationalStyle_message = self.conversation_preference_prompt
121123

122124
# STEP 1: Summarize the conversation
123125
messages = state["messages"][:-1] + [SystemMessage(content=summary_message)]
124126
valid_messages = self.check_for_valid_messages(messages)
125127
summary_response = self.summarisation_llm.invoke(valid_messages)
126128

127129
# STEP 2: Analyze the conversational style
128-
messages = state["messages"][:-1] + [SystemMessage(content=conversationalStyle_message)]
129-
valid_messages = self.check_for_valid_messages(messages)
130-
conversationalStyle_response = self.summarisation_llm.invoke(valid_messages)
130+
# messages = state["messages"][:-1] + [SystemMessage(content=conversationalStyle_message)]
131+
# valid_messages = self.check_for_valid_messages(messages)
132+
# conversationalStyle_response = self.summarisation_llm.invoke(valid_messages)
131133

132134
# Delete messages that are no longer wanted, except the last ones
133-
delete_messages: list[AllMessageTypes] = [RemoveMessage(id=m.id) for m in state["messages"][:-3]]
135+
delete_messages: list[AllMessageTypes] = [RemoveMessage(id=m.id) for m in state["messages"][:-5]]
134136

135-
return {"summary": summary_response.content, "conversationalStyle": conversationalStyle_response.content, "messages": delete_messages}
137+
# return {"summary": summary_response.content, "conversationalStyle": conversationalStyle_response.content, "messages": delete_messages}
138+
return {"summary": summary_response.content, "messages": delete_messages}
136139

137140
def should_summarize(self, state: State) -> str:
138141
"""
@@ -144,6 +147,8 @@ def should_summarize(self, state: State) -> str:
144147
messages = state["messages"]
145148
valid_messages = self.check_for_valid_messages(messages)
146149
nr_messages = len(valid_messages)
150+
if len(valid_messages) == 0:
151+
raise Exception("Internal Error: No valid messages found in the conversation history. Conversation history might be empty.")
147152
if "system" in valid_messages[-1].type:
148153
nr_messages -= 1
149154

@@ -181,7 +186,7 @@ def invoke_informational_agent(query: str, conversation_history: list, summary:
181186
print(f'in invoke_informational_agent(), query = {query}, thread_id = {session_id}')
182187

183188
config = {"configurable": {"thread_id": session_id, "summary": summary, "conversational_style": conversationalStyle, "question_response_details": question_response_details}}
184-
response_events = agent.app.invoke({"messages": conversation_history + [HumanMessage(content=query)]}, config=config, stream_mode="values") #updates
189+
response_events = agent.app.invoke({"messages": conversation_history, "summary": summary, "conversational_style": conversationalStyle}, config=config, stream_mode="values") #updates
185190
pretty_printed_response = agent.pretty_response_value(response_events) # get last event/ai answer in the response
186191

187192
# Gather Metadata from the agent

0 commit comments

Comments
 (0)