Skip to content

Commit 885e7bf

Browse files
adamreevembrobbel
andauthored
Add more examples of using Parquet encryption (#7374)
* Add encryption round-trip example * Add examples for building FileEncryptionProperties and FileDecryptionProperties * Add an example of using a KeyRetriever * Apply suggestions from code review Co-authored-by: Matthijs Brobbel <[email protected]> --------- Co-authored-by: Matthijs Brobbel <[email protected]>
1 parent fec03ea commit 885e7bf

File tree

3 files changed

+237
-3
lines changed

3 files changed

+237
-3
lines changed

parquet/src/encryption/decrypt.rs

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,77 @@ use std::io::Read;
2828
use std::sync::Arc;
2929

3030
/// Trait for retrieving an encryption key using the key's metadata
31+
///
32+
/// # Example
33+
///
34+
/// This shows how you might use a `KeyRetriever` to decrypt a Parquet file
35+
/// if you have a set of known encryption keys with identifiers, but at read time
36+
/// you may not know which columns were encrypted and which keys were used.
37+
///
38+
/// In practice, the key metadata might instead store an encrypted key that must
39+
/// be decrypted with a Key Management Server.
40+
///
41+
/// ```
42+
/// # use std::collections::HashMap;
43+
/// # use std::sync::{Arc, Mutex};
44+
/// # use parquet::encryption::decrypt::{FileDecryptionProperties, KeyRetriever};
45+
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
46+
/// # use parquet::errors::ParquetError;
47+
/// // Define known encryption keys
48+
/// let mut keys = HashMap::new();
49+
/// keys.insert("kf".to_owned(), b"0123456789012345".to_vec());
50+
/// keys.insert("kc1".to_owned(), b"1234567890123450".to_vec());
51+
/// keys.insert("kc2".to_owned(), b"1234567890123451".to_vec());
52+
///
53+
/// // Create encryption properties for writing a file,
54+
/// // and specify the key identifiers as the key metadata.
55+
/// let encryption_properties = FileEncryptionProperties::builder(keys.get("kf").unwrap().clone())
56+
/// .with_footer_key_metadata("kf".into())
57+
/// .with_column_key_and_metadata("x", keys.get("kc1").unwrap().clone(), "kc1".as_bytes().into())
58+
/// .with_column_key_and_metadata("y", keys.get("kc2").unwrap().clone(), "kc2".as_bytes().into())
59+
/// .build()?;
60+
///
61+
/// // Write an encrypted file with the properties
62+
/// // ...
63+
///
64+
/// // Define a KeyRetriever that can get encryption keys using their identifiers
65+
/// struct CustomKeyRetriever {
66+
/// keys: Mutex<HashMap<String, Vec<u8>>>,
67+
/// }
68+
///
69+
/// impl KeyRetriever for CustomKeyRetriever {
70+
/// fn retrieve_key(&self, key_metadata: &[u8]) -> parquet::errors::Result<Vec<u8>> {
71+
/// // Metadata is bytes, so convert it to a string identifier
72+
/// let key_metadata = std::str::from_utf8(key_metadata).map_err(|e| {
73+
/// ParquetError::General(format!("Could not convert key metadata to string: {e}"))
74+
/// })?;
75+
/// // Lookup the key
76+
/// let keys = self.keys.lock().unwrap();
77+
/// match keys.get(key_metadata) {
78+
/// Some(key) => Ok(key.clone()),
79+
/// None => Err(ParquetError::General(format!(
80+
/// "Could not retrieve key for metadata {key_metadata:?}"
81+
/// ))),
82+
/// }
83+
/// }
84+
/// }
85+
///
86+
/// let key_retriever = Arc::new(CustomKeyRetriever {
87+
/// keys: Mutex::new(keys),
88+
/// });
89+
///
90+
/// // Create decryption properties for reading an encrypted file.
91+
/// // Note that we don't need to specify which columns are encrypted,
92+
/// // this is determined by the file metadata and the required keys will be retrieved
93+
/// // dynamically using our key retriever.
94+
/// let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever)
95+
/// .build()?;
96+
///
97+
/// // Read an encrypted file with the decryption properties
98+
/// // ...
99+
///
100+
/// # Ok::<(), parquet::errors::ParquetError>(())
101+
/// ```
31102
pub trait KeyRetriever: Send + Sync {
32103
/// Retrieve a decryption key given the key metadata
33104
fn retrieve_key(&self, key_metadata: &[u8]) -> Result<Vec<u8>>;
@@ -195,7 +266,43 @@ impl PartialEq for DecryptionKeys {
195266
}
196267
}
197268

198-
/// FileDecryptionProperties hold keys and AAD data required to decrypt a Parquet file.
269+
/// `FileDecryptionProperties` hold keys and AAD data required to decrypt a Parquet file.
270+
///
271+
/// When reading Arrow data, the `FileDecryptionProperties` should be included in the
272+
/// [`ArrowReaderOptions`](crate::arrow::arrow_reader::ArrowReaderOptions) using
273+
/// [`with_file_decryption_properties`](crate::arrow::arrow_reader::ArrowReaderOptions::with_file_decryption_properties).
274+
///
275+
/// # Examples
276+
///
277+
/// Create `FileDecryptionProperties` for a file encrypted with uniform encryption,
278+
/// where all metadata and data are encrypted with the footer key:
279+
/// ```
280+
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
281+
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
282+
/// .build()?;
283+
/// # Ok::<(), parquet::errors::ParquetError>(())
284+
/// ```
285+
///
286+
/// Create properties for a file where columns are encrypted with different keys:
287+
/// ```
288+
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
289+
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
290+
/// .with_column_key("x", b"1234567890123450".into())
291+
/// .with_column_key("y", b"1234567890123451".into())
292+
/// .build()?;
293+
/// # Ok::<(), parquet::errors::ParquetError>(())
294+
/// ```
295+
///
296+
/// Specify additional authenticated data, used to protect against data replacement.
297+
/// This must match the AAD prefix provided when the file was written, otherwise
298+
/// data decryption will fail.
299+
/// ```
300+
/// # use parquet::encryption::decrypt::FileDecryptionProperties;
301+
/// let file_encryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
302+
/// .with_aad_prefix("example_file".into())
303+
/// .build()?;
304+
/// # Ok::<(), parquet::errors::ParquetError>(())
305+
/// ```
199306
#[derive(Clone, PartialEq)]
200307
pub struct FileDecryptionProperties {
201308
keys: DecryptionKeys,
@@ -277,6 +384,8 @@ impl std::fmt::Debug for FileDecryptionProperties {
277384
}
278385

279386
/// Builder for [`FileDecryptionProperties`]
387+
///
388+
/// See [`FileDecryptionProperties`] for example usage.
280389
pub struct DecryptionPropertiesBuilder {
281390
footer_key: Option<Vec<u8>>,
282391
key_retriever: Option<Arc<dyn KeyRetriever>>,

parquet/src/encryption/encrypt.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,41 @@ impl EncryptionKey {
5353

5454
#[derive(Debug, Clone, PartialEq)]
5555
/// Defines how data in a Parquet file should be encrypted
56+
///
57+
/// The `FileEncryptionProperties` should be included in the [`WriterProperties`](crate::file::properties::WriterProperties)
58+
/// used to write a file by using [`WriterPropertiesBuilder::with_file_encryption_properties`](crate::file::properties::WriterPropertiesBuilder::with_file_encryption_properties).
59+
///
60+
/// # Examples
61+
///
62+
/// Create `FileEncryptionProperties` for a file encrypted with uniform encryption,
63+
/// where all metadata and data are encrypted with the footer key:
64+
/// ```
65+
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
66+
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
67+
/// .build()?;
68+
/// # Ok::<(), parquet::errors::ParquetError>(())
69+
/// ```
70+
///
71+
/// Create properties for a file where columns are encrypted with different keys.
72+
/// Any columns without a key specified will be unencrypted:
73+
/// ```
74+
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
75+
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
76+
/// .with_column_key("x", b"1234567890123450".into())
77+
/// .with_column_key("y", b"1234567890123451".into())
78+
/// .build()?;
79+
/// # Ok::<(), parquet::errors::ParquetError>(())
80+
/// ```
81+
///
82+
/// Specify additional authenticated data, used to protect against data replacement.
83+
/// This should represent the file identity:
84+
/// ```
85+
/// # use parquet::encryption::encrypt::FileEncryptionProperties;
86+
/// let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
87+
/// .with_aad_prefix("example_file".into())
88+
/// .build()?;
89+
/// # Ok::<(), parquet::errors::ParquetError>(())
90+
/// ```
5691
pub struct FileEncryptionProperties {
5792
encrypt_footer: bool,
5893
footer_key: EncryptionKey,
@@ -141,6 +176,8 @@ impl FileEncryptionProperties {
141176
}
142177

143178
/// Builder for [`FileEncryptionProperties`]
179+
///
180+
/// See [`FileEncryptionProperties`] for example usage.
144181
pub struct EncryptionPropertiesBuilder {
145182
encrypt_footer: bool,
146183
footer_key: EncryptionKey,

parquet/src/encryption/mod.rs

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,96 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Encryption implementation specific to Parquet, as described
19-
//! in the [spec](https://github.com/apache/parquet-format/blob/master/Encryption.md).
18+
//! This module implements Parquet Modular Encryption, as described in the
19+
//! [specification](https://github.com/apache/parquet-format/blob/master/Encryption.md).
20+
//!
21+
//! # Example of writing and reading an encrypted Parquet file
22+
//!
23+
//! ```
24+
//! use arrow::array::{ArrayRef, Float32Array, Int32Array, RecordBatch};
25+
//! use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
26+
//! use parquet::arrow::ArrowWriter;
27+
//! use parquet::encryption::decrypt::FileDecryptionProperties;
28+
//! use parquet::encryption::encrypt::FileEncryptionProperties;
29+
//! use parquet::errors::Result;
30+
//! use parquet::file::properties::WriterProperties;
31+
//! use std::fs::File;
32+
//! use std::sync::Arc;
33+
//! use tempfile::TempDir;
34+
//!
35+
//! // Define 16 byte AES encryption keys to use.
36+
//! static FOOTER_KEY: &[u8; 16] = b"0123456789012345";
37+
//! static COLUMN_KEY_1: &[u8; 16] = b"1234567890123450";
38+
//! static COLUMN_KEY_2: &[u8; 16] = b"1234567890123451";
39+
//!
40+
//! let temp_dir = TempDir::new()?;
41+
//! let file_path = temp_dir.path().join("encrypted_example.parquet");
42+
//!
43+
//! // Create file encryption properties, which define how the file is encrypted.
44+
//! // We will specify a key to encrypt the footer metadata,
45+
//! // then separate keys for different columns.
46+
//! // This allows fine-grained control of access to different columns within a Parquet file.
47+
//! // Note that any columns without an encryption key specified will be left un-encrypted.
48+
//! // If only a footer key is specified, then all columns are encrypted with the footer key.
49+
//! let encryption_properties = FileEncryptionProperties::builder(FOOTER_KEY.into())
50+
//! .with_column_key("x", COLUMN_KEY_1.into())
51+
//! .with_column_key("y", COLUMN_KEY_2.into())
52+
//! // We also set an AAD prefix, which is optional.
53+
//! // This contributes to the "additional authenticated data" that is used to verify file
54+
//! // integrity and prevents data being swapped with data encrypted with the same key.
55+
//! .with_aad_prefix(b"example_aad".into())
56+
//! // Specify that the AAD prefix is stored in the file, so readers don't need
57+
//! // to provide it to read the data, but can optionally provide it if they want to
58+
//! // verify file integrity.
59+
//! .with_aad_prefix_storage(true)
60+
//! .build()?;
61+
//!
62+
//! let writer_properties = WriterProperties::builder()
63+
//! .with_file_encryption_properties(encryption_properties)
64+
//! .build();
65+
//!
66+
//! // Write the encrypted Parquet file
67+
//! {
68+
//! let file = File::create(&file_path)?;
69+
//!
70+
//! let ids = Int32Array::from(vec![0, 1, 2, 3, 4, 5]);
71+
//! let x_vals = Float32Array::from(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5]);
72+
//! let y_vals = Float32Array::from(vec![1.0, 1.1, 1.2, 1.3, 1.4, 1.5]);
73+
//! let batch = RecordBatch::try_from_iter(vec![
74+
//! ("id", Arc::new(ids) as ArrayRef),
75+
//! ("x", Arc::new(x_vals) as ArrayRef),
76+
//! ("y", Arc::new(y_vals) as ArrayRef),
77+
//! ])?;
78+
//!
79+
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(writer_properties))?;
80+
//!
81+
//! writer.write(&batch)?;
82+
//! writer.close()?;
83+
//! }
84+
//!
85+
//! // In order to read the encrypted Parquet file, we need to know the encryption
86+
//! // keys used to encrypt it.
87+
//! // We don't need to provide the AAD prefix as it was stored in the file metadata,
88+
//! // but we could specify it here if we wanted to verify the file hasn't been tampered with:
89+
//! let decryption_properties = FileDecryptionProperties::builder(FOOTER_KEY.into())
90+
//! .with_column_key("x", COLUMN_KEY_1.into())
91+
//! .with_column_key("y", COLUMN_KEY_2.into())
92+
//! .build()?;
93+
//!
94+
//! let reader_options =
95+
//! ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
96+
//!
97+
//! // Read the file using the configured decryption properties
98+
//! let file = File::open(&file_path)?;
99+
//!
100+
//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, reader_options)?;
101+
//! let record_reader = builder.build()?;
102+
//! for batch in record_reader {
103+
//! let batch = batch?;
104+
//! println!("Read batch: {batch:?}");
105+
//! }
106+
//! # Ok::<(), parquet::errors::ParquetError>(())
107+
//! ```
20108
21109
pub(crate) mod ciphers;
22110
pub mod decrypt;

0 commit comments

Comments
 (0)