Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
salman1993 committed Jan 20, 2025
1 parent ecf842a commit 03b1db6
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
3 changes: 2 additions & 1 deletion crates/goose/src/providers/configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ const QWEN_TOKENIZER: &str = "Qwen--Qwen2.5-Coder-32B-Instruct";
pub struct ModelConfig {
/// The name of the model to use
pub model_name: String,
// HuggingFace tokenizer name
// Optional tokenizer name (corresponds to the sanitized HuggingFace tokenizer name)
// "Xenova/gpt-4o" -> "Xenova/gpt-4o"
// If not provided, best attempt will be made to infer from model name or default
pub tokenizer_name: String,
/// Optional explicit context limit that overrides any defaults
Expand Down
5 changes: 1 addition & 4 deletions crates/goose/src/token_counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,7 @@ impl TokenCounter {
/// Fallback: If not found in embedded, we look in `base_dir` on disk.
/// If not on disk, we download from Hugging Face, then load from disk.
fn download_and_load(tokenizer_name: &str) -> Result<Self, Box<dyn Error>> {
// This is where we store them locally
// e.g. "../tokenizer_files/Xenova--llama3-tokenizer"
let base_dir = std::env::temp_dir();
let local_dir = base_dir.join(tokenizer_name);
let local_dir = std::env::temp_dir().join(tokenizer_name);
let local_json_path = local_dir.join("tokenizer.json");

// If the file doesn't already exist, we download from HF
Expand Down

0 comments on commit 03b1db6

Please sign in to comment.