Skip to content

Commit 80a3ac0

Browse files
chore(Examples): Update simple/compound beacon example scenario (#225)
1 parent 6bcc5e7 commit 80a3ac0

File tree

6 files changed

+203
-162
lines changed

6 files changed

+203
-162
lines changed

Examples/runtimes/java/DynamoDbEncryption/src/main/java/software/amazon/cryptography/examples/searchableencryption/BasicSearchableEncryptionExample.java

Lines changed: 123 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,21 @@
3434
/*
3535
This example demonstrates how to set up a beacon on an encrypted attribute,
3636
put an item with the beacon, and query against that beacon.
37-
This example follows a use case of a database that stores customer location data.
37+
This example follows a use case of a database that stores unit inspection information.
3838
39-
Running this example requires access to a DDB table with the
40-
following primary key configuration:
41-
- Partition key is named "customer_id" with type (S)
42-
- Sort key is named "create_time" with type (S)
43-
This table must have a Global Secondary Index (GSI) configured named "state-zip-index":
44-
- Partition key is named "aws_dbe_b_state" with type (S)
45-
- Sort key is named "aws_dbe_b_zip" with type (S)
39+
Running this example requires access to a DDB table with the
40+
following key configuration:
41+
- Partition key is named "work_id" with type (S)
42+
- Sort key is named "inspection_date" with type (S)
43+
This table must have a Global Secondary Index (GSI) configured named "last4-unit-index":
44+
- Partition key is named "aws_dbe_b_inspector_id_last4" with type (S)
45+
- Sort key is named "aws_dbe_b_unit" with type (S)
4646
47-
In this example for storing customer location data, this schema is utilized for the data:
48-
- "customer_id" stores a unique customer identifier
49-
- "create_time" stores a Unix timestamp
50-
- "state" stores an encrypted 2-letter US state or territory abbreviation
51-
(https://www.faa.gov/air_traffic/publications/atpubs/cnt_html/appendix_a.html)
52-
- "zip" stores an encrypted 5-digit US zipcode (00000 - 99999)
47+
In this example for storing unit inspection information, this schema is utilized for the data:
48+
- "work_id" stores a unique identifier for a unit inspection work order (v4 UUID)
49+
- "inspection_date" stores an ISO 8601 date for the inspection (YYYY-MM-DD)
50+
- "inspector_id_last4" stores the last 4 digits of the ID of the inspector performing the work
51+
- "unit" stores a 12-digit serial number for the unit being inspected
5352
5453
The example requires the following ordered input command line parameters:
5554
1. DDB table name for table to put/query data from
@@ -61,10 +60,9 @@ This table must have a Global Secondary Index (GSI) configured named "state-zip-
6160
*/
6261

6362
public class BasicSearchableEncryptionExample {
64-
65-
static String GSI_NAME = "state-zip-index";
66-
67-
public static void PutItemQueryItemWithBeacon(String ddbTableName, String branchKeyId, String branchKeyWrappingKmsKeyArn, String branchKeyDdbTableName) {
63+
static String GSI_NAME = "last4-unit-index";
64+
public static void PutItemQueryItemWithBeacon(String ddbTableName, String branchKeyId,
65+
String branchKeyWrappingKmsKeyArn, String branchKeyDdbTableName) {
6866

6967
// 1. Configure Beacons.
7068
// The beacon name must be the name of a table attribute that will be encrypted.
@@ -73,56 +71,90 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
7371
// https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
7472
List<StandardBeacon> standardBeaconList = new ArrayList<>();
7573

76-
// The configured DDB table has a GSI on the `aws_dbe_b_state` AttributeName
77-
// Since this field is assumed to hold a well-distributed US 2-letter state abbreviation
78-
// (56 = 50 states + 6 territories),
79-
// we follow the guidance in the link above to determine acceptable bounds for beacon length:
80-
// - min: log(sqrt(56))/log(2) ~= 2.9, round up to 3
81-
// - max: log((56/2))/log(2) ~= 4.8, round up to 5
82-
// We can safely choose a beacon length between 3 and 5:
83-
// - Closer to 3, the underlying data is better obfuscated, but more "false positives" are returned in
84-
// queries, leading to more decrypt calls and worse performance
85-
// - Closer to 5, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
86-
// better performance, but it is easier to distinguish unique plaintext values
87-
// As an example, we will choose 4.
88-
// Values stored in aws_dbe_b_state will be 4 bits long (0x0 - 0xf)
89-
// There will be 2^4 = 16 possible HMAC values.
90-
// With well-distributed plaintext data (56 values), we expect (56/16) = 3.5 abbrevations sharing the same beacon
91-
// value.
92-
// NOTE: This example assumes that the field values are well-distributed. In practice, this will not be true.
93-
// Some flaws in this assumption:
94-
// - More populous states would be expected to have more records; those beacons will be overused
95-
// - States where a business is not operating would expect no customer records for that state; those
96-
// beacons will be underused
97-
// This is a streamlined example and should not be used as a basis for determining beacon length
98-
// in production. Users should analyze their specific dataset to determine acceptable beacon length bounds.
99-
StandardBeacon stringBeacon = StandardBeacon.builder()
100-
.name("state")
101-
.length(4)
102-
.build();
103-
standardBeaconList.add(stringBeacon);
104-
105-
// The configured DDB table has a GSI on the `aws_dbe_b_zip` AttributeName
106-
// Since this field holds a well-distributed zipcode (100,000 possible values, of which ~42,000 are valid;
107-
// see: https://facts.usps.com/42000-zip-codes/),
108-
// we follow the guidance in the link above to determine acceptable bounds for beacon length:
109-
// - min: log(sqrt(42,000))/log(2) ~= 7.7, round up to 8
110-
// - max: log((42,000/2))/log(2) ~= 14.3, round up to 15
111-
// We can safely choose a beacon length between 8 and 15:
112-
// - Closer to 8, the underlying data is better obfuscated, but more "false positives" are returned in
113-
// queries, leading to more decrypt calls and worse performance
114-
// - Closer to 15, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
115-
// better performance, but it is easier to distinguish unique plaintext values
74+
// The configured DDB table has a GSI on the `aws_dbe_b_inspector_id_last4` AttributeName.
75+
// This field holds the last 4 digits of an inspector ID.
76+
// For our example, this field may range from 0 to 9,999 (10,000 possible values).
77+
// For our example, we assume a full inspector ID is an integer
78+
// ranging from 0 to 99,999,999. We do not assume that the full inspector ID's
79+
// values are uniformly distributed across its range of possible values.
80+
// In many use cases, the prefix of an identifier encodes some information
81+
// about that identifier (e.g. zipcode and SSN prefixes encode geographic
82+
// information), while the suffix does not and is more uniformly distributed.
83+
// We will assume that the inspector ID field matches a similar use case.
84+
// So for this example, we only store and use the last
85+
// 4 digits of the inspector ID, which we assume is uniformly distributed.
86+
// Since the full ID's range is divisible by the range of the last 4 digits,
87+
// then the last 4 digits of the inspector ID are uniformly distributed
88+
// over the range from 0 to 9,999.
89+
// See our documentation for why you should avoid creating beacons over non-uniform distributions
90+
// https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption.html#are-beacons-right-for-me
91+
// A single inspector ID suffix may be assigned to multiple `work_id`s.
92+
//
93+
// This link provides guidance for choosing a beacon length:
94+
// https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
95+
// We follow the guidance in the link above to determine reasonable bounds
96+
// for the length of a beacon on the last 4 digits of an inspector ID:
97+
// - min: log(sqrt(10,000))/log(2) ~= 6.6, round up to 7
98+
// - max: log((10,000/2))/log(2) ~= 12.3, round down to 12
99+
// You will somehow need to round results to a nearby integer.
100+
// We choose to round to the nearest integer; you might consider a different rounding approach.
101+
// Rounding up will return fewer expected "false positives" in queries,
102+
// leading to fewer decrypt calls and better performance,
103+
// but it is easier to identify which beacon values encode distinct plaintexts.
104+
// Rounding down will return more expected "false positives" in queries,
105+
// leading to more decrypt calls and worse performance,
106+
// but it is harder to identify which beacon values encode distinct plaintexts.
107+
// We can choose a beacon length between 7 and 12:
108+
// - Closer to 7, we expect more "false positives" to be returned,
109+
// making it harder to identify which beacon values encode distinct plaintexts,
110+
// but leading to more decrypt calls and worse performance
111+
// - Closer to 12, we expect fewer "false positives" returned in queries,
112+
// leading to fewer decrypt calls and better performance,
113+
// but it is easier to identify which beacon values encode distinct plaintexts.
116114
// As an example, we will choose 10.
117-
// Values stored in aws_dbe_b_zip will be 10 bits long (0x000 - 0x3ff).
118-
// There will be 2^10 = 1024 possible HMAC values.
119-
// With well-distributed plaintext data (100,000 values), we expect (42,000/1024) ~= 41 zipcodes sharing the same
120-
// beacon value.
121-
StandardBeacon numberBeacon = StandardBeacon.builder()
122-
.name("zip")
115+
//
116+
// Values stored in aws_dbe_b_inspector_id_last4 will be 10 bits long (0x000 - 0x3ff)
117+
// There will be 2^10 = 1,024 possible HMAC values.
118+
// With a sufficiently large number of well-distributed inspector IDs,
119+
// for a particular beacon we expect (10,000/1,024) ~= 9.8 4-digit inspector ID suffixes
120+
// sharing that beacon value.
121+
StandardBeacon last4Beacon = StandardBeacon.builder()
122+
.name("inspector_id_last4")
123123
.length(10)
124124
.build();
125-
standardBeaconList.add(numberBeacon);
125+
standardBeaconList.add(last4Beacon);
126+
127+
// The configured DDB table has a GSI on the `aws_dbe_b_unit` AttributeName.
128+
// This field holds a unit serial number.
129+
// For this example, this is a 12-digit integer from 0 to 999,999,999,999 (10^12 possible values).
130+
// We will assume values for this attribute are uniformly distributed across this range.
131+
// A single unit serial number may be assigned to multiple `work_id`s.
132+
//
133+
// This link provides guidance for choosing a beacon length:
134+
// https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
135+
// We follow the guidance in the link above to determine reasonable bounds
136+
// for the length of a beacon on a unit serial number:
137+
// - min: log(sqrt(999,999,999,999))/log(2) ~= 19.9, round up to 20
138+
// - max: log((999,999,999,999/2))/log(2) ~= 38.9, round up to 39
139+
// We can choose a beacon length between 20 and 39:
140+
// - Closer to 20, we expect more "false positives" to be returned,
141+
// making it harder to identify which beacon values encode distinct plaintexts,
142+
// but leading to more decrypt calls and worse performance
143+
// - Closer to 39, we expect fewer "false positives" returned in queries,
144+
// leading to fewer decrypt calls and better performance,
145+
// but it is easier to identify which beacon values encode distinct plaintexts.
146+
// As an example, we will choose 30.
147+
//
148+
// Values stored in aws_dbe_b_unit will be 30 bits long (0x00000000 - 0x3fffffff)
149+
// There will be 2^30 = 1,073,741,824 ~= 1.1B possible HMAC values.
150+
// With a sufficiently large number of well-distributed inspector IDs,
151+
// for a particular beacon we expect (10^12/2^30) ~= 931.3 unit serial numbers
152+
// sharing that beacon value.
153+
StandardBeacon unitBeacon = StandardBeacon.builder()
154+
.name("unit")
155+
.length(30)
156+
.build();
157+
standardBeaconList.add(unitBeacon);
126158

127159
// 2. Configure Keystore.
128160
// The keystore is a separate DDB table where the client stores encryption and decryption materials.
@@ -196,18 +228,18 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
196228
// - DO_NOTHING: The attribute is not encrypted and not included in the signature
197229
// Any attributes that will be used in beacons must be configured as ENCRYPT_AND_SIGN.
198230
final Map<String, CryptoAction> attributeActionsOnEncrypt = new HashMap<>();
199-
attributeActionsOnEncrypt.put("customer_id", CryptoAction.SIGN_ONLY); // Our partition attribute must be SIGN_ONLY
200-
attributeActionsOnEncrypt.put("create_time", CryptoAction.SIGN_ONLY); // Our sort attribute must be SIGN_ONLY
201-
attributeActionsOnEncrypt.put("state", CryptoAction.ENCRYPT_AND_SIGN); // Beaconized attributes must be encrypted
202-
attributeActionsOnEncrypt.put("zip", CryptoAction.ENCRYPT_AND_SIGN); // Beaconized attributes must be encrypted
231+
attributeActionsOnEncrypt.put("work_id", CryptoAction.SIGN_ONLY); // Our partition attribute must be SIGN_ONLY
232+
attributeActionsOnEncrypt.put("inspection_date", CryptoAction.SIGN_ONLY); // Our sort attribute must be SIGN_ONLY
233+
attributeActionsOnEncrypt.put("inspector_id_last4", CryptoAction.ENCRYPT_AND_SIGN); // Beaconized attributes must be encrypted
234+
attributeActionsOnEncrypt.put("unit", CryptoAction.ENCRYPT_AND_SIGN); // Beaconized attributes must be encrypted
203235

204236
// 6. Create the DynamoDb Encryption configuration for the table we will be writing to.
205237
// The beaconVersions are added to the search configuration.
206238
final Map<String, DynamoDbTableEncryptionConfig> tableConfigs = new HashMap<>();
207239
final DynamoDbTableEncryptionConfig config = DynamoDbTableEncryptionConfig.builder()
208240
.logicalTableName(ddbTableName)
209-
.partitionKeyName("customer_id")
210-
.sortKeyName("create_time")
241+
.partitionKeyName("work_id")
242+
.sortKeyName("inspection_date")
211243
.attributeActionsOnEncrypt(attributeActionsOnEncrypt)
212244
.keyring(kmsKeyring)
213245
.search(SearchConfig.builder()
@@ -235,17 +267,17 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
235267
// 9. Put an item into our table using the above client.
236268
// Before the item gets sent to DynamoDb, it will be encrypted
237269
// client-side, according to our configuration.
238-
// Since our configuration includes beacons for `state` and `zip`,
270+
// Since our configuration includes beacons for `inspector_id_last4` and `unit`,
239271
// the client will add two additional attributes to the item. These attributes will have names
240-
// `aws_dbe_b_state` and `aws_dbe_b_zip`. Their values will be HMACs
272+
// `aws_dbe_b_inspector_id_last4` and `aws_dbe_b_unit`. Their values will be HMACs
241273
// truncated to as many bits as the beacon's `length` parameter; e.g.
242-
// aws_dbe_b_state = truncate(HMAC("WA"), 4)
243-
// aws_dbe_b_zip = truncate(HMAC("98101"), 10)
274+
// aws_dbe_b_inspector_id_last4 = truncate(HMAC("4321"), 10)
275+
// aws_dbe_b_unit = truncate(HMAC("123456789012"), 30)
244276
final HashMap<String, AttributeValue> item = new HashMap<>();
245-
item.put("customer_id", AttributeValue.builder().s("ABCD-1234").build());
246-
item.put("create_time", AttributeValue.builder().n("1681495205").build());
247-
item.put("state", AttributeValue.builder().s("WA").build());
248-
item.put("zip", AttributeValue.builder().s("98101").build());
277+
item.put("work_id", AttributeValue.builder().s("1313ba89-5661-41eb-ba6c-cb1b4cb67b2d").build());
278+
item.put("inspection_date", AttributeValue.builder().s("2023-06-13").build());
279+
item.put("inspector_id_last4", AttributeValue.builder().s("4321").build());
280+
item.put("unit", AttributeValue.builder().s("123456789012").build());
249281

250282
final PutItemRequest putRequest = PutItemRequest.builder()
251283
.tableName(ddbTableName)
@@ -262,23 +294,26 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
262294
// and transform the query to use the beaconized name and value.
263295
// Internally, the client will query for and receive all items with a matching HMAC value in the beacon field.
264296
// This may include a number of "false positives" with different ciphertext, but the same truncated HMAC.
265-
// e.g. if truncate(HMAC("WA"), 4) == truncate(HMAC("DC"), 4), the query will return both items.
297+
// e.g. if truncate(HMAC("123456789012"), 30)
298+
// == truncate(HMAC("098765432109"), 30),
299+
// the query will return both items.
266300
// The client will decrypt all returned items to determine which ones have the expected attribute values,
267301
// and only surface items with the correct plaintext to the user.
268302
// This procedure is internal to the client and is abstracted away from the user;
269-
// e.g. the user will only see "WA" and never "DC", though the actual query returned both.
303+
// e.g. the user will only see "123456789012" and never
304+
// "098765432109", though the actual query returned both.
270305
Map<String,String> expressionAttributesNames = new HashMap<>();
271-
expressionAttributesNames.put("#s", "state");
272-
expressionAttributesNames.put("#z", "zip");
306+
expressionAttributesNames.put("#last4", "inspector_id_last4");
307+
expressionAttributesNames.put("#unit", "unit");
273308

274309
Map<String,AttributeValue> expressionAttributeValues = new HashMap<>();
275-
expressionAttributeValues.put(":s", AttributeValue.builder().s("WA").build());
276-
expressionAttributeValues.put(":z", AttributeValue.builder().s("98101").build());
310+
expressionAttributeValues.put(":last4", AttributeValue.builder().s("4321").build());
311+
expressionAttributeValues.put(":unit", AttributeValue.builder().s("123456789012").build());
277312

278313
QueryRequest queryRequest = QueryRequest.builder()
279314
.tableName(ddbTableName)
280315
.indexName(GSI_NAME)
281-
.keyConditionExpression("#s = :s and #z = :z")
316+
.keyConditionExpression("#last4 = :last4 and #unit = :unit")
282317
.expressionAttributeNames(expressionAttributesNames)
283318
.expressionAttributeValues(expressionAttributeValues)
284319
.build();
@@ -291,8 +326,8 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
291326
assert attributeValues.size() == 1;
292327
final Map<String, AttributeValue> returnedItem = attributeValues.get(0);
293328
// Validate the item has the expected attributes
294-
assert returnedItem.get("state").s().equals("WA");
295-
assert returnedItem.get("zip").s().equals("98101");
329+
assert returnedItem.get("inspector_id_last4").s().equals("4321");
330+
assert returnedItem.get("unit").s().equals("123456789012");
296331
}
297332

298333
public static void main(final String[] args) {

0 commit comments

Comments
 (0)