Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pre-compute/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod app_runner;
pub mod dataset;
pub mod errors;
pub mod pre_compute_app;
pub mod pre_compute_args;
Expand Down
233 changes: 233 additions & 0 deletions pre-compute/src/compute/dataset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
use crate::compute::errors::ReplicateStatusCause;
use crate::compute::utils::file_utils::download_from_url;
use crate::compute::utils::hash_utils::sha256_from_bytes;
use aes::Aes256;
use base64::{Engine as _, engine::general_purpose};
use cbc::{
Decryptor,
cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7},
};
use log::{error, info};
use multiaddr::Multiaddr;
use std::str::FromStr;

type Aes256CbcDec = Decryptor<Aes256>;
const IPFS_GATEWAYS: &[&str] = &[
"https://ipfs-gateway.v8-bellecour.iex.ec",
"https://gateway.ipfs.io",
"https://gateway.pinata.cloud",
];
const AES_KEY_LENGTH: usize = 32;
const AES_IV_LENGTH: usize = 16;

/// Represents a dataset for bulk processing in a Trusted Execution Environment (TEE).
///
/// This structure contains all the information needed to download, verify, and decrypt
/// a single dataset as part of bulk processing.
#[cfg_attr(test, derive(Debug))]
#[derive(Clone, Default)]
pub struct Dataset {
pub url: String,
pub checksum: String,
pub filename: String,
pub key: String,
}

impl Dataset {
pub fn new(url: String, checksum: String, filename: String, key: String) -> Self {
Dataset {
url,
checksum,
filename,
key,
}
}

/// Downloads the encrypted dataset file from a URL or IPFS multi-address, and verifies its checksum.
///
/// # Arguments
///
/// * `chain_task_id` - The chain task ID for logging
///
/// # Returns
///
/// * `Ok(Vec<u8>)` containing the dataset's encrypted content if download and verification succeed.
/// * `Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)` if the download fails.
/// * `Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum)` if checksum validation fails.
pub fn download_encrypted_dataset(
&self,
chain_task_id: &str,
) -> Result<Vec<u8>, ReplicateStatusCause> {
info!(
"Downloading encrypted dataset file [chainTaskId:{chain_task_id}, url:{}]",
self.url
);

let encrypted_content = if is_multi_address(&self.url) {
IPFS_GATEWAYS.iter().find_map(|gateway| {
let full_url = format!("{gateway}{}", self.url);
info!("Attempting to download dataset from {full_url}");

if let Some(content) = download_from_url(&full_url) {
info!("Successfully downloaded from {full_url}");
Some(content)
} else {
info!("Failed to download from {full_url}");
None
}
})
} else {
download_from_url(&self.url)
}
.ok_or(ReplicateStatusCause::PreComputeDatasetDownloadFailed)?;

info!("Checking encrypted dataset checksum [chainTaskId:{chain_task_id}]");
let actual_checksum = sha256_from_bytes(&encrypted_content);

if actual_checksum != self.checksum {
error!(
"Invalid dataset checksum [chainTaskId:{chain_task_id}, expected:{}, actual:{actual_checksum}]",
self.checksum
);
return Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
}

info!("Dataset downloaded and verified successfully.");
Ok(encrypted_content)
}

/// Decrypts the provided encrypted dataset bytes using AES-CBC.
///
/// The first 16 bytes of `encrypted_content` are treated as the IV.
/// The rest is the ciphertext. The decryption key is decoded from a Base64 string.
///
/// # Arguments
///
/// * `encrypted_content` - Full encrypted dataset, including the IV prefix.
///
/// # Returns
///
/// * `Ok(Vec<u8>)` containing the plaintext dataset if decryption succeeds.
/// * `Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)` if the key is missing, decoding fails, or decryption fails.
pub fn decrypt_dataset(
&self,
encrypted_content: &[u8],
) -> Result<Vec<u8>, ReplicateStatusCause> {
let key = general_purpose::STANDARD
.decode(&self.key)
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)?;

if encrypted_content.len() < AES_IV_LENGTH || key.len() != AES_KEY_LENGTH {
return Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed);
}

let key_slice = &key[..AES_KEY_LENGTH];
let iv_slice = &encrypted_content[..AES_IV_LENGTH];
let ciphertext = &encrypted_content[AES_IV_LENGTH..];

Aes256CbcDec::new(key_slice.into(), iv_slice.into())
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
}
}

fn is_multi_address(uri: &str) -> bool {
!uri.trim().is_empty() && Multiaddr::from_str(uri).is_ok()
}

#[cfg(test)]
mod tests {
use super::*;

const CHAIN_TASK_ID: &str = "0x123456789abcdef";
const DATASET_CHECKSUM: &str =
"0x02a12ef127dcfbdb294a090c8f0b69a0ca30b7940fc36cabf971f488efd374d7";
const ENCRYPTED_DATASET_KEY: &str = "ubA6H9emVPJT91/flYAmnKHC0phSV3cfuqsLxQfgow0=";
const HTTP_DATASET_URL: &str = "https://raw.githubusercontent.com/iExecBlockchainComputing/tee-worker-pre-compute-rust/main/src/tests_resources/encrypted-data.bin";
const PLAIN_DATA_FILE: &str = "plain-data.txt";
const IPFS_DATASET_URL: &str = "/ipfs/QmUVhChbLFiuzNK1g2GsWyWEiad7SXPqARnWzGumgziwEp";

fn get_test_dataset() -> Dataset {
Dataset::new(
HTTP_DATASET_URL.to_string(),
DATASET_CHECKSUM.to_string(),
PLAIN_DATA_FILE.to_string(),
ENCRYPTED_DATASET_KEY.to_string(),
)
}

// region download_encrypted_dataset
#[test]
fn download_encrypted_dataset_success() {
let dataset = get_test_dataset();
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
assert!(actual_content.is_ok());
}

#[test]
fn download_encrypted_dataset_failure_with_invalid_dataset_url() {
let mut dataset = get_test_dataset();
dataset.url = "http://bad-url".to_string();
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
assert_eq!(
actual_content,
Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)
);
}

#[test]
fn download_encrypted_dataset_success_with_valid_iexec_gateway() {
let mut dataset = get_test_dataset();
dataset.url = IPFS_DATASET_URL.to_string();
dataset.checksum =
"0x323b1637c7999942fbebfe5d42fe15dbfe93737577663afa0181938d7ad4a2ac".to_string();
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
let expected_content = Ok("hello world !\n".as_bytes().to_vec());
assert_eq!(actual_content, expected_content);
}

#[test]
fn download_encrypted_dataset_failure_with_invalid_gateway() {
let mut dataset = get_test_dataset();
dataset.url = "/ipfs/INVALID_IPFS_DATASET_URL".to_string();
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
let expected_content = Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed);
assert_eq!(actual_content, expected_content);
}

#[test]
fn download_encrypted_dataset_failure_with_invalid_dataset_checksum() {
let mut dataset = get_test_dataset();
dataset.checksum = "invalid_dataset_checksum".to_string();
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
let expected_content = Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
assert_eq!(actual_content, expected_content);
}
// endregion

// region decrypt_dataset
#[test]
fn decrypt_dataset_success_with_valid_dataset() {
let dataset = get_test_dataset();

let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
let expected_plain_data = Ok("Some very useful data.".as_bytes().to_vec());
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);

assert_eq!(actual_plain_data, expected_plain_data);
}

#[test]
fn decrypt_dataset_failure_with_bad_key() {
let mut dataset = get_test_dataset();
dataset.key = "bad_key".to_string();
let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);

assert_eq!(
actual_plain_data,
Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
);
}
// endregion
}
Loading
Loading