Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions deepseek-r1-burn/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "deepseek-r1-burn"
version = "0.1.0"
edition = "2021"
description = "DeepSeek-R1 model implementation using Burn"

[dependencies]
burn = { version = "0.16.0", default-features = false, features = ["std", "dataset"] }
serde = { version = "1.0.108", features = ["derive"] }
serde_json = "1.0.108"
tokenizers = "0.15.0"
thiserror = "1.0.50"

[features]
default = ["std"]
std = []
webgpu = ["burn/wgpu"]
cuda = ["burn/cuda"]
1 change: 1 addition & 0 deletions deepseek-r1-burn/LICENSE-APACHE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../LICENSE-APACHE
1 change: 1 addition & 0 deletions deepseek-r1-burn/LICENSE-MIT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../LICENSE-MIT
28 changes: 28 additions & 0 deletions deepseek-r1-burn/NOTICES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Notices

This project is based on the following open source projects:

## Burn Framework
- Source: https://github.com/tracel-ai/burn
- License: Apache-2.0
- Copyright (c) 2024 Burn Contributors

## DeepSeek-R1
- Source: https://github.com/deepseek-ai/DeepSeek-R1
- License: Apache-2.0
- Copyright (c) 2024 DeepSeek AI

## Tokenizers
- Source: https://github.com/huggingface/tokenizers
- License: Apache-2.0
- Copyright (c) 2024 Hugging Face, Inc.

## Serde
- Source: https://github.com/serde-rs/serde
- License: Apache-2.0 and MIT
- Copyright (c) 2024 Serde Contributors

## ThisError
- Source: https://github.com/dtolnay/thiserror
- License: Apache-2.0 and MIT
- Copyright (c) 2024 David Tolnay
147 changes: 147 additions & 0 deletions deepseek-r1-burn/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# DeepSeek-R1-Burn

A Rust implementation of the DeepSeek-R1 language model using the Burn framework.

## Features

- Full implementation of the DeepSeek-R1 architecture
- Support for both CPU and GPU (CUDA/WebGPU) backends
- Tokenization support using HuggingFace's tokenizers
- Training utilities with Burn's training framework
- Model configuration and serialization
- Text generation with temperature sampling
- Fine-tuning support

## Installation

Add the following to your `Cargo.toml`:

```toml
[dependencies]
deepseek-r1-burn = { version = "0.1.0", features = ["webgpu"] } # or "cuda" for CUDA support
```

## Usage

### Basic Usage

```rust
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1, DeepSeekTokenizer};
use std::path::Path;

fn main() {
// Create model configuration
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Load tokenizer
let tokenizer = DeepSeekTokenizer::new(Path::new("path/to/tokenizer.json"))
.expect("Failed to load tokenizer");

// Tokenize input
let input = "Hello, world!";
let tokens = tokenizer.encode(input).expect("Failed to encode input");

// Forward pass
let input_tensor = burn::tensor::Tensor::<_, 2>::from_data(
burn::tensor::TensorData::new(tokens, [1, tokens.len()]),
&device,
);
let output = model.forward(input_tensor);

// Decode output
let output_tokens = output.argmax(2).into_data().value;
let text = tokenizer.decode(&output_tokens).expect("Failed to decode output");
println!("Generated text: {}", text);
}
```

### Model Serialization

```rust
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1};
use std::path::Path;

fn main() {
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Save model
model.save_file(Path::new("model.pt")).expect("Failed to save model");

// Load model
let loaded_model: DeepSeekR1<_> = DeepSeekR1::load_file(Path::new("model.pt"), &device)
.expect("Failed to load model");
}
```

### Text Generation

```rust
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1, DeepSeekTokenizer};
use std::path::Path;

fn main() {
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);
let tokenizer = DeepSeekTokenizer::new(Path::new("path/to/tokenizer.json"))
.expect("Failed to load tokenizer");

// Generate text with temperature sampling
let prompt = "Once upon a time";
let generated = generate_text(&model, &tokenizer, prompt, 100, 0.8);
println!("Generated text: {}", generated);
}
```

### Fine-tuning

```rust
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1, TrainingConfig};

fn main() {
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Create training configuration
let mut training_config = TrainingConfig::default();
training_config.learning_rate = 5e-5; // Lower learning rate for fine-tuning
training_config.epochs = 3;
training_config.batch_size = 4;

// Fine-tune the model
let fine_tuned_model = train(model, dataset, training_config);
}
```

## Examples

The repository includes several examples:

- `examples/generate.rs`: Text generation with temperature sampling
- `examples/save_load.rs`: Model serialization and deserialization
- `examples/finetune.rs`: Fine-tuning the model on custom data

Run an example with:

```bash
cargo run --example generate
```

## Features

- `std`: Enable standard library support (default)
- `webgpu`: Enable WebGPU backend support
- `cuda`: Enable CUDA backend support

## License

This project is licensed under both the Apache License 2.0 and MIT License. See the [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) files for details.

## Contributing

Contributions are welcome! Please feel free to submit a Pull Request.
73 changes: 73 additions & 0 deletions deepseek-r1-burn/examples/finetune.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use burn::{
data::dataset::Dataset,
tensor::backend::Backend,
};
use deepseek_r1_burn::{
deepseek_r1_config, DeepSeekR1, DeepSeekTokenizer, TrainingConfig, train,
};
use std::path::Path;

struct TextDataset {
texts: Vec<String>,
tokenizer: DeepSeekTokenizer,
max_length: usize,
}

impl<B: Backend> Dataset<burn::tensor::Tensor<B, 2>> for TextDataset {
fn get(&self, index: usize) -> Option<burn::tensor::Tensor<B, 2>> {
let text = self.texts.get(index)?;
let tokens = self.tokenizer.encode(text).ok()?;

// Truncate or pad to max_length
let mut padded = vec![0; self.max_length];
let len = tokens.len().min(self.max_length);
padded[..len].copy_from_slice(&tokens[..len]);

Some(burn::tensor::Tensor::<_, 2>::from_data(
burn::tensor::TensorData::new(padded, [1, self.max_length]),
&B::Device::default(),
))
}

fn len(&self) -> usize {
self.texts.len()
}
}

fn main() {
// Create model configuration
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Load tokenizer
let tokenizer = DeepSeekTokenizer::new(Path::new("path/to/tokenizer.json"))
.expect("Failed to load tokenizer");

// Create dataset
let texts = vec![
"This is a sample text for fine-tuning.".to_string(),
"Another example text for the model to learn from.".to_string(),
// Add more training examples here
];
let dataset = TextDataset {
texts,
tokenizer,
max_length: 512,
};

// Configure training
let mut training_config = TrainingConfig::default();
training_config.learning_rate = 5e-5; // Lower learning rate for fine-tuning
training_config.epochs = 3;
training_config.batch_size = 4;

// Fine-tune the model
let fine_tuned_model = train(model, dataset, training_config);

// Save the fine-tuned model
fine_tuned_model
.save_file(Path::new("fine_tuned_model.pt"))
.expect("Failed to save fine-tuned model");
println!("Model fine-tuning completed and saved!");
}
66 changes: 66 additions & 0 deletions deepseek-r1-burn/examples/generate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use burn::tensor::backend::Backend;
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1, DeepSeekTokenizer};
use std::path::Path;

fn generate_text<B: Backend>(
model: &DeepSeekR1<B>,
tokenizer: &DeepSeekTokenizer,
prompt: &str,
max_length: usize,
temperature: f32,
) -> String {
let device = B::Device::default();
let mut tokens = tokenizer.encode(prompt).expect("Failed to encode prompt");
let mut generated = String::new();

for _ in 0..max_length {
// Convert tokens to tensor
let input = burn::tensor::Tensor::<_, 2>::from_data(
burn::tensor::TensorData::new(tokens.clone(), [1, tokens.len()]),
&device,
);

// Get model predictions
let output = model.forward(input);
let logits = output.slice([0..1, -1..]);

// Apply temperature
let logits = logits / temperature;

// Sample next token
let probs = logits.softmax(1);
let next_token = probs.multinomial(1).into_data().value[0];

// Add token to sequence
tokens.push(next_token);

// Decode new token
let new_text = tokenizer
.decode(&[next_token])
.expect("Failed to decode token");
generated.push_str(&new_text);

// Stop if we generate an EOS token
if next_token == tokenizer.vocab_size() as u32 - 1 {
break;
}
}

generated
}

fn main() {
// Create model configuration
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Load tokenizer
let tokenizer = DeepSeekTokenizer::new(Path::new("path/to/tokenizer.json"))
.expect("Failed to load tokenizer");

// Generate text
let prompt = "Once upon a time";
let generated = generate_text(&model, &tokenizer, prompt, 100, 0.8);
println!("Generated text: {}", generated);
}
25 changes: 25 additions & 0 deletions deepseek-r1-burn/examples/save_load.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use burn::tensor::backend::Backend;
use deepseek_r1_burn::{deepseek_r1_config, DeepSeekR1};
use std::path::Path;

fn main() {
// Create model configuration
let config = deepseek_r1_config();
let device = burn::tensor::Device::default();
let model: DeepSeekR1<_> = DeepSeekR1::new(&config);

// Save model
let save_path = Path::new("model.pt");
model.save_file(save_path).expect("Failed to save model");

// Load model
let loaded_model: DeepSeekR1<_> = DeepSeekR1::load_file(save_path, &device)
.expect("Failed to load model");

// Verify the models are the same
let input = burn::tensor::Tensor::<_, 2>::zeros([1, 10], &device);
let output1 = model.forward(input.clone());
let output2 = loaded_model.forward(input);
assert!(output1.equal(output2));
println!("Model successfully saved and loaded!");
}
Loading