Skip to content

Commit e5215a1

Browse files
authored
feat: Add EncryptedKey struct (#1326)
## Which issue does this PR close? Implements the `EncryptionKey` type as a preparation for v3 Metadata. https://github.com/apache/iceberg/pull/12162/files This doesn't implement the full v3 TableMetadata yet, but helps to keep the v3 commit smaller. Because v3 Metadata is not implemented yet, there is no way set / deserialize this field from Metadata.json yet. ## What changes are included in this PR? - Add `EncryptedKey` including serialization of `encrypted_key_metadata` as base64 - Add the `encryption_keys` field to `TableMetadata` ## Are these changes tested? yes
1 parent 5819717 commit e5215a1

File tree

8 files changed

+251
-0
lines changed

8 files changed

+251
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ async-std = "1.12"
5454
async-trait = "0.1.88"
5555
aws-config = "1.6.1"
5656
aws-sdk-glue = "1.39"
57+
base64 = "0.22.1"
5758
bimap = "0.6"
5859
bytes = "1.10"
5960
chrono = "0.4.40"

crates/iceberg/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ arrow-select = { workspace = true }
5555
arrow-string = { workspace = true }
5656
async-std = { workspace = true, optional = true, features = ["attributes"] }
5757
async-trait = { workspace = true }
58+
base64 = { workspace = true }
5859
bimap = { workspace = true }
5960
bytes = { workspace = true }
6061
chrono = { workspace = true }
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::collections::HashMap;
19+
20+
use serde::{Deserialize, Serialize};
21+
22+
/// Keys used for table encryption
23+
///
24+
/// Serializing of `encrypted_key_metadata` is done using base64 encoding.
25+
#[derive(Debug, Clone, PartialEq, Eq, typed_builder::TypedBuilder)]
26+
pub struct EncryptedKey {
27+
/// Unique identifier for the key
28+
#[builder(setter(into))]
29+
key_id: String,
30+
/// Encrypted key metadata as binary data
31+
#[builder(setter(into))]
32+
encrypted_key_metadata: Vec<u8>,
33+
/// Identifier of the entity that encrypted this key
34+
#[builder(setter(into))]
35+
encrypted_by_id: String,
36+
/// Additional properties associated with the key
37+
#[builder(default)]
38+
properties: HashMap<String, String>,
39+
}
40+
41+
impl EncryptedKey {
42+
/// Returns the key ID
43+
pub fn key_id(&self) -> &str {
44+
&self.key_id
45+
}
46+
47+
/// Returns the encrypted key metadata
48+
pub fn encrypted_key_metadata(&self) -> &[u8] {
49+
&self.encrypted_key_metadata
50+
}
51+
52+
/// Returns the ID of the entity that encrypted this key
53+
pub fn encrypted_by_id(&self) -> &str {
54+
&self.encrypted_by_id
55+
}
56+
57+
/// Returns the properties map
58+
pub fn properties(&self) -> &HashMap<String, String> {
59+
&self.properties
60+
}
61+
}
62+
63+
pub(super) mod _serde {
64+
use base64::engine::general_purpose::STANDARD as BASE64;
65+
use base64::Engine as _;
66+
67+
use super::*;
68+
69+
/// Helper struct for serializing/deserializing EncryptedKey
70+
#[derive(Serialize, Deserialize)]
71+
#[serde(rename_all = "kebab-case")]
72+
pub(super) struct EncryptedKeySerde {
73+
pub key_id: String,
74+
pub encrypted_key_metadata: String, // Base64 encoded
75+
pub encrypted_by_id: String,
76+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
77+
pub properties: HashMap<String, String>,
78+
}
79+
80+
impl From<&EncryptedKey> for EncryptedKeySerde {
81+
fn from(key: &EncryptedKey) -> Self {
82+
Self {
83+
key_id: key.key_id.clone(),
84+
encrypted_key_metadata: BASE64.encode(&key.encrypted_key_metadata),
85+
encrypted_by_id: key.encrypted_by_id.clone(),
86+
properties: key.properties.clone(),
87+
}
88+
}
89+
}
90+
91+
impl TryFrom<EncryptedKeySerde> for EncryptedKey {
92+
type Error = base64::DecodeError;
93+
94+
fn try_from(serde_key: EncryptedKeySerde) -> Result<Self, Self::Error> {
95+
let encrypted_key_metadata = BASE64.decode(&serde_key.encrypted_key_metadata)?;
96+
97+
Ok(Self {
98+
key_id: serde_key.key_id,
99+
encrypted_key_metadata,
100+
encrypted_by_id: serde_key.encrypted_by_id,
101+
properties: serde_key.properties,
102+
})
103+
}
104+
}
105+
}
106+
107+
impl Serialize for EncryptedKey {
108+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
109+
where S: serde::Serializer {
110+
let serde_key = _serde::EncryptedKeySerde::from(self);
111+
serde_key.serialize(serializer)
112+
}
113+
}
114+
115+
impl<'de> Deserialize<'de> for EncryptedKey {
116+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
117+
where D: serde::Deserializer<'de> {
118+
let serde_key = _serde::EncryptedKeySerde::deserialize(deserializer)?;
119+
120+
Self::try_from(serde_key).map_err(serde::de::Error::custom)
121+
}
122+
}
123+
124+
#[cfg(test)]
125+
mod tests {
126+
use serde_json::json;
127+
128+
use super::*;
129+
130+
#[test]
131+
fn test_encrypted_key_serialization() {
132+
// Test data
133+
let metadata = b"iceberg";
134+
let mut properties = HashMap::new();
135+
properties.insert("algo".to_string(), "AES-256".to_string());
136+
properties.insert("created-at".to_string(), "2023-05-15T10:30:00Z".to_string());
137+
138+
// Create the encrypted key
139+
let key = EncryptedKey::builder()
140+
.key_id("5f819b")
141+
.encrypted_key_metadata(metadata.to_vec())
142+
.encrypted_by_id("user-456")
143+
.properties(properties)
144+
.build();
145+
146+
// Serialize to JSON
147+
let serialized = serde_json::to_value(&key).unwrap();
148+
149+
let expected = json!({
150+
"key-id": "5f819b",
151+
"encrypted-key-metadata": "aWNlYmVyZw==",
152+
"encrypted-by-id": "user-456",
153+
"properties": {
154+
"algo": "AES-256",
155+
"created-at": "2023-05-15T10:30:00Z"
156+
}
157+
});
158+
assert_eq!(serialized, expected);
159+
}
160+
161+
#[test]
162+
fn test_encrypted_key_round_trip() {
163+
// Test data
164+
let metadata = b"binary\0data\xff\xfe with special bytes";
165+
let mut properties = HashMap::new();
166+
properties.insert("algo".to_string(), "AES-256".to_string());
167+
168+
// Create the original encrypted key
169+
let original_key = EncryptedKey::builder()
170+
.key_id("key-abc")
171+
.encrypted_key_metadata(metadata.to_vec())
172+
.encrypted_by_id("service-xyz")
173+
.properties(properties)
174+
.build();
175+
176+
// Serialize to JSON string
177+
let json_string = serde_json::to_string(&original_key).unwrap();
178+
179+
// Deserialize back from JSON string
180+
let deserialized_key: EncryptedKey = serde_json::from_str(&json_string).unwrap();
181+
182+
// Verify the keys match
183+
assert_eq!(deserialized_key, original_key);
184+
assert_eq!(deserialized_key.encrypted_key_metadata(), metadata);
185+
}
186+
187+
#[test]
188+
fn test_encrypted_key_empty_properties() {
189+
// Create a key without properties
190+
let key = EncryptedKey::builder()
191+
.key_id("key-123")
192+
.encrypted_key_metadata(b"data".to_vec())
193+
.encrypted_by_id("user-456")
194+
.build();
195+
196+
// Serialize to JSON
197+
let serialized = serde_json::to_value(&key).unwrap();
198+
199+
// Verify properties field is skipped when empty
200+
assert!(!serialized.as_object().unwrap().contains_key("properties"));
201+
202+
// Deserialize back
203+
let deserialized: EncryptedKey = serde_json::from_value(serialized).unwrap();
204+
assert_eq!(deserialized.properties().len(), 0);
205+
}
206+
207+
#[test]
208+
fn test_invalid_base64() {
209+
// Invalid base64 string
210+
let json_value = json!({
211+
"key-id": "key-123",
212+
"encrypted-key-metadata": "invalid@base64",
213+
"encrypted-by-id": "user-456"
214+
});
215+
216+
// Attempt to deserialize should fail
217+
let result: Result<EncryptedKey, _> = serde_json::from_value(json_value);
218+
assert!(result.is_err());
219+
}
220+
}

crates/iceberg/src/spec/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! Spec for Iceberg.
1919
2020
mod datatypes;
21+
mod encrypted_key;
2122
mod manifest;
2223
mod manifest_list;
2324
mod name_mapping;
@@ -36,6 +37,7 @@ mod view_metadata_builder;
3637
mod view_version;
3738

3839
pub use datatypes::*;
40+
pub use encrypted_key::*;
3941
pub use manifest::*;
4042
pub use manifest_list::*;
4143
pub use name_mapping::*;

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ pub struct TableMetadata {
175175
pub(crate) statistics: HashMap<i64, StatisticsFile>,
176176
/// Mapping of snapshot ids to partition statistics files.
177177
pub(crate) partition_statistics: HashMap<i64, PartitionStatisticsFile>,
178+
/// Encryption Keys
179+
pub(crate) encryption_keys: HashMap<String, String>,
178180
}
179181

180182
impl TableMetadata {
@@ -418,6 +420,18 @@ impl TableMetadata {
418420
}
419421
}
420422

423+
/// Iterate over all encryption keys
424+
#[inline]
425+
pub fn encryption_keys_iter(&self) -> impl ExactSizeIterator<Item = (&String, &String)> {
426+
self.encryption_keys.iter()
427+
}
428+
429+
/// Get the encryption key for a given key id
430+
#[inline]
431+
pub fn encryption_key(&self, key_id: &str) -> Option<&String> {
432+
self.encryption_keys.get(key_id)
433+
}
434+
421435
/// Normalize this partition spec.
422436
///
423437
/// This is an internal method
@@ -905,6 +919,7 @@ pub(super) mod _serde {
905919
}),
906920
statistics: index_statistics(value.statistics),
907921
partition_statistics: index_partition_statistics(value.partition_statistics),
922+
encryption_keys: HashMap::new(),
908923
};
909924

910925
metadata.borrow_mut().try_normalize()?;
@@ -1061,6 +1076,7 @@ pub(super) mod _serde {
10611076
},
10621077
statistics: index_statistics(value.statistics),
10631078
partition_statistics: index_partition_statistics(value.partition_statistics),
1079+
encryption_keys: HashMap::new(),
10641080
};
10651081

10661082
metadata.borrow_mut().try_normalize()?;
@@ -1460,6 +1476,7 @@ mod tests {
14601476
refs: HashMap::new(),
14611477
statistics: HashMap::new(),
14621478
partition_statistics: HashMap::new(),
1479+
encryption_keys: HashMap::new(),
14631480
};
14641481

14651482
let expected_json_value = serde_json::to_value(&expected).unwrap();
@@ -1635,6 +1652,7 @@ mod tests {
16351652
refs: HashMap::from_iter(vec![("main".to_string(), SnapshotReference { snapshot_id: 638933773299822130, retention: SnapshotRetention::Branch { min_snapshots_to_keep: None, max_snapshot_age_ms: None, max_ref_age_ms: None } })]),
16361653
statistics: HashMap::new(),
16371654
partition_statistics: HashMap::new(),
1655+
encryption_keys: HashMap::new(),
16381656
};
16391657

16401658
check_table_metadata_serde(data, expected);
@@ -1732,6 +1750,7 @@ mod tests {
17321750
refs: HashMap::new(),
17331751
statistics: HashMap::new(),
17341752
partition_statistics: HashMap::new(),
1753+
encryption_keys: HashMap::new(),
17351754
};
17361755

17371756
let expected_json_value = serde_json::to_value(&expected).unwrap();
@@ -2262,6 +2281,7 @@ mod tests {
22622281
max_ref_age_ms: None,
22632282
},
22642283
})]),
2284+
encryption_keys: HashMap::new(),
22652285
};
22662286

22672287
check_table_metadata_serde(data, expected);
@@ -2396,6 +2416,7 @@ mod tests {
23962416
max_ref_age_ms: None,
23972417
},
23982418
})]),
2419+
encryption_keys: HashMap::new(),
23992420
};
24002421

24012422
check_table_metadata_serde(data, expected);
@@ -2557,6 +2578,7 @@ mod tests {
25572578
})]),
25582579
statistics: HashMap::new(),
25592580
partition_statistics: HashMap::new(),
2581+
encryption_keys: HashMap::new(),
25602582
};
25612583

25622584
check_table_metadata_serde(&metadata, expected);
@@ -2641,6 +2663,7 @@ mod tests {
26412663
refs: HashMap::new(),
26422664
statistics: HashMap::new(),
26432665
partition_statistics: HashMap::new(),
2666+
encryption_keys: HashMap::new(),
26442667
};
26452668

26462669
check_table_metadata_serde(&metadata, expected);
@@ -2709,6 +2732,7 @@ mod tests {
27092732
refs: HashMap::new(),
27102733
statistics: HashMap::new(),
27112734
partition_statistics: HashMap::new(),
2735+
encryption_keys: HashMap::new(),
27122736
};
27132737

27142738
check_table_metadata_serde(&metadata, expected);

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ impl TableMetadataBuilder {
120120
refs: HashMap::default(),
121121
statistics: HashMap::new(),
122122
partition_statistics: HashMap::new(),
123+
encryption_keys: HashMap::new(),
123124
},
124125
last_updated_ms: None,
125126
changes: vec![],

crates/iceberg/src/writer/file_writer/location_generator.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ pub(crate) mod test {
179179
refs: HashMap::new(),
180180
statistics: HashMap::new(),
181181
partition_statistics: HashMap::new(),
182+
encryption_keys: HashMap::new(),
182183
};
183184

184185
let file_name_genertaor = super::DefaultFileNameGenerator::new(

0 commit comments

Comments
 (0)