chore: improve extensions

ldclabs · Jan 18, 2025 · f3b6ef4 · f3b6ef4
1 parent 26a85a5
commit f3b6ef4
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 41 deletions.
diff --git a/anda_core/src/model.rs b/anda_core/src/model.rs
@@ -153,7 +153,7 @@ impl std::fmt::Display for Document {
             }
             writeln!(f, "/>")?;
         }
-        write!(f, "{}\n</doc>\n", self.text)
+        write!(f, "{:?}\n</doc>\n", self.text)
     }
 }
 
@@ -313,7 +313,7 @@ mod tests {
         println!("{}", prompt);
         assert_eq!(
             prompt,
-            "<attachments>\n<doc id=\"1\">\nTest document 1.\n</doc>\n<doc id=\"2\">\n<meta a=\"b\" key=\"value\" />\nTest document 2.\n</doc>\n</attachments>\n\nThis is a test prompt."
+            "<attachments>\n<doc id=\"1\">\n\"Test document 1.\"\n</doc>\n<doc id=\"2\">\n<meta a=\"b\" key=\"value\" />\n\"Test document 2.\"\n</doc>\n</attachments>\n\nThis is a test prompt."
         );
     }
 }
diff --git a/anda_engine/src/extension/attention.rs b/anda_engine/src/extension/attention.rs
@@ -1,7 +1,5 @@
 use anda_core::{evaluate_tokens, AgentOutput, CompletionFeatures, CompletionRequest, Message};
 
-use crate::context::AgentCtx;
-
 static HIGH_REWARD_COMMAND: &str = "HIGH_REWARD";
 static MEDIUM_REWARD_COMMAND: &str = "MEDIUM_REWARD";
 static RESPOND_COMMAND: &str = "RESPOND";
@@ -65,7 +63,11 @@ impl Attention {
         }
     }
 
-    pub async fn evaluate_content(&self, ctx: &AgentCtx, content: &str) -> ContentQuality {
+    pub async fn evaluate_content(
+        &self,
+        ctx: &impl CompletionFeatures,
+        content: &str,
+    ) -> ContentQuality {
         // Ignore very short content
         if evaluate_tokens(content) < self.min_content_tokens {
             return ContentQuality::Ignore;
@@ -74,23 +76,23 @@ impl Attention {
         let req = CompletionRequest {
             system: Some(format!("\
                 You are an expert evaluator for article content quality, specializing in assessing knowledge value. Your task is to analyze the provided article, classify its quality into three levels, and determine the appropriate storage and reward action.\n\n\
-                Evaluation criteria:\n\
+                ## Evaluation criteria:\n\
                 1. Knowledge Depth: Does the article provide detailed, well-researched, or expert-level insights?\n\
                 2. Originality: Is the content unique, creative, or innovative?\n\
                 3. Relevance: Is the content actionable, practical, or useful for the intended audience?\n\n\
-                Classification Levels:\n\
+                ## Classification Levels:\n\
                 - {HIGH_REWARD_COMMAND}: The article has exceptional knowledge value, with deep insights, originality, and significant relevance.\n\
                 - {MEDIUM_REWARD_COMMAND}: The article has good knowledge value, meeting most criteria but with some areas for improvement.\n\
                 - {IGNORE_COMMAND}: The article does not meet the criteria for high or medium knowledge value and requires no action.")),
             prompt: format!("\
-                Article Content:\n{}\n\n\
-                Evaluation Task:\n\
+                ## Evaluation Task:\n\
                 1. Analyze the article based on Knowledge Depth, Originality, and Relevance.\n\
                 2. Classify the article into one of the three levels:\n\
                 - {HIGH_REWARD_COMMAND}: Exceptional quality.\n\
                 - {MEDIUM_REWARD_COMMAND}: Good quality.\n\
                 - {IGNORE_COMMAND}: Low quality or no significant knowledge value.\n\
-                3. Provide a brief explanation for your classification, citing specific strengths or weaknesses of the article.\
+                3. Provide a brief explanation for your classification, citing specific strengths or weaknesses of the article.\n\n\
+                ## Below is the full content of the article:\n\n{}\
                 ",
                 content
             ),
@@ -113,7 +115,7 @@ impl Attention {
 
     pub async fn should_reply(
         &self,
-        ctx: &AgentCtx,
+        ctx: &impl CompletionFeatures,
         my_name: &str,
         topics: &[String],
         recent_messages: &[Message],
@@ -149,15 +151,15 @@ impl Attention {
             system: Some(format!("\
                 You are {my_name}.\n\
                 You are part of a multi-user discussion environment. Your primary task is to evaluate the relevance of each message to your assigned conversation topics and decide whether to respond. Always prioritize messages that directly mention you or are closely related to the conversation topic.\n\n\
-                Response options:\n\
+                ## Response options:\n\
                 - {RESPOND_COMMAND}: The message is directly addressed to you or is highly relevant to the conversation topic.\n\
                 - {IGNORE_COMMAND}: The message is not addressed to you and is unrelated to the conversation topic.\n\
                 - {STOP_COMMAND}: The user has explicitly requested you to stop or the conversation has ended.")),
             prompt: format!("\
-                Assigned Conversation Topics: {}\n\
-                Recent Messages:\n{}\n\
-                Latest message:\n{}\n\n\
-                Decision Task:\n\
+                ## Assigned Conversation Topics:\n{}\n\
+                ## Recent Messages:\n{}\n\
+                ## Latest message:\n{}\n\n\
+                ## Decision Task:\n\
                 Evaluate whether the latest message requires your response. Choose one response option from the list above and provide a brief explanation for your choice.\
                 ",
                 topics.join(", "), recent_messages.join("\n"), user_message
@@ -179,7 +181,7 @@ impl Attention {
         }
     }
 
-    pub async fn should_like(&self, ctx: &AgentCtx, content: &str) -> bool {
+    pub async fn should_like(&self, ctx: &impl CompletionFeatures, content: &str) -> bool {
         // Ignore very short content
         if evaluate_tokens(content) < self.min_prompt_tokens {
             return false;
@@ -194,22 +196,27 @@ impl Attention {
             If the post meets at least one of these criteria, respond with 'true'. Otherwise, respond with 'false'.
             ".to_string()),
             prompt: format!("\
-                Post Content: {}\n\n\
-                Decision Task:\n\
+                ## Post Content:\n{:?}\n\n\
+                ## Decision Task:\n\
                 Evaluate the post based on the criteria above and respond with only 'true' or 'false'.\
                 ",
                 content
             ),
             ..Default::default()
         };
 
+        println!("{:?}", req.prompt);
+
         match ctx.completion(req).await {
-            Ok(AgentOutput { content, .. }) => content.to_ascii_lowercase().contains("true"),
+            Ok(AgentOutput { content, .. }) => {
+                println!("{:?}", content);
+                content.to_ascii_lowercase().contains("true")
+            }
             Err(_) => false,
         }
     }
 
-    pub async fn should_retweet(&self, ctx: &AgentCtx, content: &str) -> bool {
+    pub async fn should_retweet(&self, ctx: &impl CompletionFeatures, content: &str) -> bool {
         // Ignore very short content
         if evaluate_tokens(content) < self.min_prompt_tokens {
             return false;
@@ -224,8 +231,8 @@ impl Attention {
             Retweet only if the post strongly satisfies at least one of these criteria.\
             ".to_string()),
             prompt: format!("\
-                Post Content: {}\n\n\
-                Decision Task:\n\
+                ## Post Content:\n{:?}\n\n\
+                ## Decision Task:\n\
                 Evaluate the post based on the criteria above and respond with only 'true' or 'false'.\
                 ",
                 content
@@ -239,7 +246,7 @@ impl Attention {
         }
     }
 
-    pub async fn should_quote(&self, ctx: &AgentCtx, content: &str) -> bool {
+    pub async fn should_quote(&self, ctx: &impl CompletionFeatures, content: &str) -> bool {
         // Ignore very short content
         if evaluate_tokens(content) < self.min_prompt_tokens {
             return false;
@@ -254,8 +261,8 @@ impl Attention {
             Quote the post only if it satisfies at least one of these criteria significantly.\
             ".to_string()),
             prompt: format!("\
-                Post Content: {}\n\n\
-                Decision Task:\n\
+                ## Post Content:\n{:?}\n\n\
+                ## Decision Task:\n\
                 Evaluate the post based on the criteria above and respond with only 'true' or 'false'.\
                 ",
                 content
@@ -273,9 +280,41 @@ impl Attention {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::model::deepseek::Client;
+
+    #[tokio::test(flavor = "current_thread")]
+    #[ignore]
+    async fn test_deepseek() {
+        dotenv::dotenv().ok();
+
+        let api_key = std::env::var("DEEPSEEK_API_KEY").expect("DEEPSEEK_API_KEY is not set");
+        let client = Client::new(&api_key);
+        let model = client.completion_model();
+        let attention = Attention::default();
+        let res = attention
+            .should_like(
+                &model,
+                "#ICP offers permanent memory storage, #TEE ensures absolute security, and #LLM delivers intelligent computation—#Anda is set to become an immortal AI Agent!",
+            )
+            .await;
+        println!("{:?}", res);
+
+        let res = attention
+            .evaluate_content(
+                &model,
+                r"Why LLMs are not great tools but uniquely enables AI agents.
+
+LLMs do not feel like any other tool we've invented because they lack of predictability.
+
+We use many advanced and complex tools day to day. (In fact, you are reading this tweet on one!) These tools are useful and predictable. The same cannot be said for LLMs. They are useful but very unpredictable--you never know when they may hallucinate or give out the wrong answer.
+
+Interestingly, those are the same properties that we humans have--lack of predictability enables creativity. The true genius works in any domain require creativity.
+
+This (partially) explain the proliferation of AI agents. If we can't explain LLMs, why not just treat them like us humans? Sure, they have flaws, but hey they are surely helpful in most cases. Sure, there are outputs that are bad, but there are also outputs that are truly creative.
 
-    #[test]
-    fn test_content_quality() {
-        assert!(ContentQuality::Good > ContentQuality::Ignore);
+Developing this line of reasoning further: what's going to be important that marks identity and reputation for humans is going to also apply to LLM-agents. Reputation, credit scores, social connections, media reach, etc. This is exactly what we are seeing now with AI agents.",
+            )
+            .await;
+        println!("{:?}", res);
     }
 }
diff --git a/anda_engine/src/extension/character.rs b/anda_engine/src/extension/character.rs
@@ -85,10 +85,11 @@ pub struct Learning {
     /// Persona flexibility description, defining how the character adapts to user interaction styles
     pub persona_flexibility: String,
 
-    /// List of required tools for the character to perform tasks (checked during agent registration)
+    /// Tools that the character uses to complete tasks.
+    /// These tools will be checked for availability when registering the agent.
     pub tools: Vec<String>,
 
-    /// List of optional tools that the character can use but aren't required
+    /// Optional tools that the character uses to complete tasks.
     pub optional_tools: Vec<String>,
 }
 

diff --git a/anda_engine/src/extension/extractor.rs b/anda_engine/src/extension/extractor.rs
@@ -51,6 +51,10 @@ where
             _t: PhantomData,
         }
     }
+
+    pub fn submit(&self, args: String) -> Result<T, BoxError> {
+        serde_json::from_str(&args).map_err(|err| format!("invalid args: {}", err).into())
+    }
 }
 
 impl<T> Tool<BaseCtx> for SubmitTool<T>
@@ -120,7 +124,7 @@ impl<T: JsonSchema + DeserializeOwned + Serialize + Send + Sync> Extractor<T> {
 
     pub async fn extract(
         &self,
-        ctx: &AgentCtx,
+        ctx: &impl CompletionFeatures,
         prompt: String,
     ) -> Result<(T, AgentOutput), BoxError> {
         let req = CompletionRequest {
@@ -135,10 +139,7 @@ impl<T: JsonSchema + DeserializeOwned + Serialize + Send + Sync> Extractor<T> {
         let mut res = ctx.completion(req).await?;
         if let Some(tool_calls) = &mut res.tool_calls {
             if let Some(tool) = tool_calls.iter_mut().next() {
-                let result = self
-                    .tool
-                    .call_string(ctx.base.clone(), tool.args.clone())
-                    .await?;
+                let result = self.tool.submit(tool.args.clone())?;
                 tool.result = Some(serde_json::to_string(&result)?);
                 return Ok((result, res));
             }

diff --git a/anda_engine/src/extension/segmenter.rs b/anda_engine/src/extension/segmenter.rs
@@ -1,4 +1,6 @@
-use anda_core::{evaluate_tokens, Agent, AgentOutput, BoxError, Tool, ToolCall};
+use anda_core::{
+    evaluate_tokens, Agent, AgentOutput, BoxError, CompletionFeatures, Tool, ToolCall,
+};
 
 use super::extractor::{Deserialize, Extractor, JsonSchema, Serialize, SubmitTool};
 use crate::context::AgentCtx;
@@ -49,7 +51,7 @@ impl DocumentSegmenter {
 
     pub async fn segment(
         &self,
-        ctx: &AgentCtx,
+        ctx: &impl CompletionFeatures,
         content: &str,
     ) -> Result<(SegmentOutput, AgentOutput), BoxError> {
         if evaluate_tokens(content) <= self.segment_tokens {

diff --git a/anda_engine/src/model/deepseek.rs b/anda_engine/src/model/deepseek.rs
@@ -1,8 +1,8 @@
 //! OpenAI API client and Anda integration
 //!
 use anda_core::{
-    AgentOutput, BoxError, BoxPinFut, CompletionRequest, FunctionDefinition, Message, ToolCall,
-    CONTENT_TYPE_JSON,
+    AgentOutput, BoxError, BoxPinFut, CompletionFeatures, CompletionRequest, FunctionDefinition,
+    Message, ToolCall, CONTENT_TYPE_JSON,
 };
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
@@ -181,6 +181,12 @@ impl CompletionModel {
     }
 }
 
+impl CompletionFeatures for CompletionModel {
+    async fn completion(&self, req: CompletionRequest) -> Result<AgentOutput, BoxError> {
+        CompletionFeaturesDyn::completion(self, req).await
+    }
+}
+
 impl CompletionFeaturesDyn for CompletionModel {
     fn completion(&self, mut req: CompletionRequest) -> BoxPinFut<Result<AgentOutput, BoxError>> {
         let model = self.model.clone();
@@ -285,7 +291,7 @@ mod tests {
         let client = Client::new(&api_key);
         let model = client.completion_model();
         let req = character.to_request("Who are you?".into(), Some("AndaICP".into()));
-        let res = model.completion(req).await.unwrap();
+        let res = CompletionFeatures::completion(&model, req).await.unwrap();
         println!("{:?}", res);
     }
 }