34
34
/*
35
35
This example demonstrates how to set up a beacon on an encrypted attribute,
36
36
put an item with the beacon, and query against that beacon.
37
- This example follows a use case of a database that stores customer location data .
37
+ This example follows a use case of a database that stores unit inspection information .
38
38
39
- Running this example requires access to a DDB table with the
40
- following primary key configuration:
41
- - Partition key is named "customer_id " with type (S)
42
- - Sort key is named "create_time " with type (S)
43
- This table must have a Global Secondary Index (GSI) configured named "state-zip -index":
44
- - Partition key is named "aws_dbe_b_state " with type (S)
45
- - Sort key is named "aws_dbe_b_zip " with type (S)
39
+ Running this example requires access to a DDB table with the
40
+ following key configuration:
41
+ - Partition key is named "work_id " with type (S)
42
+ - Sort key is named "inspection_date " with type (S)
43
+ This table must have a Global Secondary Index (GSI) configured named "last4-unit -index":
44
+ - Partition key is named "aws_dbe_b_inspector_id_last4 " with type (S)
45
+ - Sort key is named "aws_dbe_b_unit " with type (S)
46
46
47
- In this example for storing customer location data, this schema is utilized for the data:
48
- - "customer_id" stores a unique customer identifier
49
- - "create_time" stores a Unix timestamp
50
- - "state" stores an encrypted 2-letter US state or territory abbreviation
51
- (https://www.faa.gov/air_traffic/publications/atpubs/cnt_html/appendix_a.html)
52
- - "zip" stores an encrypted 5-digit US zipcode (00000 - 99999)
47
+ In this example for storing unit inspection information, this schema is utilized for the data:
48
+ - "work_id" stores a unique identifier for a unit inspection work order (v4 UUID)
49
+ - "inspection_date" stores an ISO 8601 date for the inspection (YYYY-MM-DD)
50
+ - "inspector_id_last4" stores the last 4 digits of the ID of the inspector performing the work
51
+ - "unit" stores a 12-digit serial number for the unit being inspected
53
52
54
53
The example requires the following ordered input command line parameters:
55
54
1. DDB table name for table to put/query data from
@@ -61,10 +60,9 @@ This table must have a Global Secondary Index (GSI) configured named "state-zip-
61
60
*/
62
61
63
62
public class BasicSearchableEncryptionExample {
64
-
65
- static String GSI_NAME = "state-zip-index" ;
66
-
67
- public static void PutItemQueryItemWithBeacon (String ddbTableName , String branchKeyId , String branchKeyWrappingKmsKeyArn , String branchKeyDdbTableName ) {
63
+ static String GSI_NAME = "last4-unit-index" ;
64
+ public static void PutItemQueryItemWithBeacon (String ddbTableName , String branchKeyId ,
65
+ String branchKeyWrappingKmsKeyArn , String branchKeyDdbTableName ) {
68
66
69
67
// 1. Configure Beacons.
70
68
// The beacon name must be the name of a table attribute that will be encrypted.
@@ -73,56 +71,90 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
73
71
// https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
74
72
List <StandardBeacon > standardBeaconList = new ArrayList <>();
75
73
76
- // The configured DDB table has a GSI on the `aws_dbe_b_state ` AttributeName
77
- // Since this field is assumed to hold a well-distributed US 2-letter state abbreviation
78
- // (56 = 50 states + 6 territories),
79
- // we follow the guidance in the link above to determine acceptable bounds for beacon length:
80
- // - min: log(sqrt(56))/log(2) ~= 2.9, round up to 3
81
- // - max: log((56/2))/log(2) ~= 4.8, round up to 5
82
- // We can safely choose a beacon length between 3 and 5:
83
- // - Closer to 3, the underlying data is better obfuscated, but more "false positives" are returned in
84
- // queries, leading to more decrypt calls and worse performance
85
- // - Closer to 5, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
86
- // better performance, but it is easier to distinguish unique plaintext values
87
- // As an example, we will choose 4 .
88
- // Values stored in aws_dbe_b_state will be 4 bits long (0x0 - 0xf)
89
- // There will be 2^4 = 16 possible HMAC values.
90
- // With well-distributed plaintext data (56 values), we expect (56/16) = 3.5 abbrevations sharing the same beacon
91
- // value.
92
- // NOTE: This example assumes that the field values are well-distributed. In practice, this will not be true.
93
- // Some flaws in this assumption:
94
- // - More populous states would be expected to have more records; those beacons will be overused
95
- // - States where a business is not operating would expect no customer records for that state; those
96
- // beacons will be underused
97
- // This is a streamlined example and should not be used as a basis for determining beacon length
98
- // in production. Users should analyze their specific dataset to determine acceptable beacon length bounds.
99
- StandardBeacon stringBeacon = StandardBeacon . builder ()
100
- . name ( "state" )
101
- . length ( 4 )
102
- . build ();
103
- standardBeaconList . add ( stringBeacon );
104
-
105
- // The configured DDB table has a GSI on the `aws_dbe_b_zip` AttributeName
106
- // Since this field holds a well-distributed zipcode (100,000 possible values, of which ~42,000 are valid;
107
- // see: https://facts.usps.com/42000-zip-codes/) ,
108
- // we follow the guidance in the link above to determine acceptable bounds for beacon length:
109
- // - min: log(sqrt(42,000))/log(2) ~= 7.7, round up to 8
110
- // - max: log((42,000/2))/log(2) ~= 14.3, round up to 15
111
- // We can safely choose a beacon length between 8 and 15:
112
- // - Closer to 8, the underlying data is better obfuscated, but more "false positives" are returned in
113
- // queries, leading to more decrypt calls and worse performance
114
- // - Closer to 15, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
115
- // better performance, but it is easier to distinguish unique plaintext values
74
+ // The configured DDB table has a GSI on the `aws_dbe_b_inspector_id_last4 ` AttributeName.
75
+ // This field holds the last 4 digits of an inspector ID.
76
+ // For our example, this field may range from 0 to 9,999 (10,000 possible values).
77
+ // For our example, we assume a full inspector ID is an integer
78
+ // ranging from 0 to 99,999,999. We do not assume that the full inspector ID's
79
+ // values are uniformly distributed across its range of possible values.
80
+ // In many use cases, the prefix of an identifier encodes some information
81
+ // about that identifier (e.g. zipcode and SSN prefixes encode geographic
82
+ // information), while the suffix does not and is more uniformly distributed.
83
+ // We will assume that the inspector ID field matches a similar use case.
84
+ // So for this example, we only store and use the last
85
+ // 4 digits of the inspector ID, which we assume is uniformly distributed .
86
+ // Since the full ID's range is divisible by the range of the last 4 digits,
87
+ // then the last 4 digits of the inspector ID are uniformly distributed
88
+ // over the range from 0 to 9,999.
89
+ // See our documentation for why you should avoid creating beacons over non-uniform distributions
90
+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption.html#are-beacons-right-for-me
91
+ // A single inspector ID suffix may be assigned to multiple `work_id`s.
92
+ //
93
+ // This link provides guidance for choosing a beacon length:
94
+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
95
+ // We follow the guidance in the link above to determine reasonable bounds
96
+ // for the length of a beacon on the last 4 digits of an inspector ID:
97
+ // - min: log(sqrt(10,000))/log(2) ~= 6.6, round up to 7
98
+ // - max: log((10,000/2))/log(2) ~= 12.3, round down to 12
99
+ // You will somehow need to round results to a nearby integer.
100
+ // We choose to round to the nearest integer; you might consider a different rounding approach.
101
+ // Rounding up will return fewer expected "false positives" in queries,
102
+ // leading to fewer decrypt calls and better performance,
103
+ // but it is easier to identify which beacon values encode distinct plaintexts.
104
+ // Rounding down will return more expected "false positives" in queries,
105
+ // leading to more decrypt calls and worse performance ,
106
+ // but it is harder to identify which beacon values encode distinct plaintexts.
107
+ // We can choose a beacon length between 7 and 12:
108
+ // - Closer to 7, we expect more "false positives" to be returned,
109
+ // making it harder to identify which beacon values encode distinct plaintexts,
110
+ // but leading to more decrypt calls and worse performance
111
+ // - Closer to 12, we expect fewer "false positives" returned in queries,
112
+ // leading to fewer decrypt calls and better performance,
113
+ // but it is easier to identify which beacon values encode distinct plaintexts.
116
114
// As an example, we will choose 10.
117
- // Values stored in aws_dbe_b_zip will be 10 bits long (0x000 - 0x3ff).
118
- // There will be 2^10 = 1024 possible HMAC values.
119
- // With well-distributed plaintext data (100,000 values), we expect (42,000/1024) ~= 41 zipcodes sharing the same
120
- // beacon value.
121
- StandardBeacon numberBeacon = StandardBeacon .builder ()
122
- .name ("zip" )
115
+ //
116
+ // Values stored in aws_dbe_b_inspector_id_last4 will be 10 bits long (0x000 - 0x3ff)
117
+ // There will be 2^10 = 1,024 possible HMAC values.
118
+ // With a sufficiently large number of well-distributed inspector IDs,
119
+ // for a particular beacon we expect (10,000/1,024) ~= 9.8 4-digit inspector ID suffixes
120
+ // sharing that beacon value.
121
+ StandardBeacon last4Beacon = StandardBeacon .builder ()
122
+ .name ("inspector_id_last4" )
123
123
.length (10 )
124
124
.build ();
125
- standardBeaconList .add (numberBeacon );
125
+ standardBeaconList .add (last4Beacon );
126
+
127
+ // The configured DDB table has a GSI on the `aws_dbe_b_unit` AttributeName.
128
+ // This field holds a unit serial number.
129
+ // For this example, this is a 12-digit integer from 0 to 999,999,999,999 (10^12 possible values).
130
+ // We will assume values for this attribute are uniformly distributed across this range.
131
+ // A single unit serial number may be assigned to multiple `work_id`s.
132
+ //
133
+ // This link provides guidance for choosing a beacon length:
134
+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
135
+ // We follow the guidance in the link above to determine reasonable bounds
136
+ // for the length of a beacon on a unit serial number:
137
+ // - min: log(sqrt(999,999,999,999))/log(2) ~= 19.9, round up to 20
138
+ // - max: log((999,999,999,999/2))/log(2) ~= 38.9, round up to 39
139
+ // We can choose a beacon length between 20 and 39:
140
+ // - Closer to 20, we expect more "false positives" to be returned,
141
+ // making it harder to identify which beacon values encode distinct plaintexts,
142
+ // but leading to more decrypt calls and worse performance
143
+ // - Closer to 39, we expect fewer "false positives" returned in queries,
144
+ // leading to fewer decrypt calls and better performance,
145
+ // but it is easier to identify which beacon values encode distinct plaintexts.
146
+ // As an example, we will choose 30.
147
+ //
148
+ // Values stored in aws_dbe_b_unit will be 30 bits long (0x00000000 - 0x3fffffff)
149
+ // There will be 2^30 = 1,073,741,824 ~= 1.1B possible HMAC values.
150
+ // With a sufficiently large number of well-distributed inspector IDs,
151
+ // for a particular beacon we expect (10^12/2^30) ~= 931.3 unit serial numbers
152
+ // sharing that beacon value.
153
+ StandardBeacon unitBeacon = StandardBeacon .builder ()
154
+ .name ("unit" )
155
+ .length (30 )
156
+ .build ();
157
+ standardBeaconList .add (unitBeacon );
126
158
127
159
// 2. Configure Keystore.
128
160
// The keystore is a separate DDB table where the client stores encryption and decryption materials.
@@ -196,18 +228,18 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
196
228
// - DO_NOTHING: The attribute is not encrypted and not included in the signature
197
229
// Any attributes that will be used in beacons must be configured as ENCRYPT_AND_SIGN.
198
230
final Map <String , CryptoAction > attributeActionsOnEncrypt = new HashMap <>();
199
- attributeActionsOnEncrypt .put ("customer_id " , CryptoAction .SIGN_ONLY ); // Our partition attribute must be SIGN_ONLY
200
- attributeActionsOnEncrypt .put ("create_time " , CryptoAction .SIGN_ONLY ); // Our sort attribute must be SIGN_ONLY
201
- attributeActionsOnEncrypt .put ("state " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
202
- attributeActionsOnEncrypt .put ("zip " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
231
+ attributeActionsOnEncrypt .put ("work_id " , CryptoAction .SIGN_ONLY ); // Our partition attribute must be SIGN_ONLY
232
+ attributeActionsOnEncrypt .put ("inspection_date " , CryptoAction .SIGN_ONLY ); // Our sort attribute must be SIGN_ONLY
233
+ attributeActionsOnEncrypt .put ("inspector_id_last4 " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
234
+ attributeActionsOnEncrypt .put ("unit " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
203
235
204
236
// 6. Create the DynamoDb Encryption configuration for the table we will be writing to.
205
237
// The beaconVersions are added to the search configuration.
206
238
final Map <String , DynamoDbTableEncryptionConfig > tableConfigs = new HashMap <>();
207
239
final DynamoDbTableEncryptionConfig config = DynamoDbTableEncryptionConfig .builder ()
208
240
.logicalTableName (ddbTableName )
209
- .partitionKeyName ("customer_id " )
210
- .sortKeyName ("create_time " )
241
+ .partitionKeyName ("work_id " )
242
+ .sortKeyName ("inspection_date " )
211
243
.attributeActionsOnEncrypt (attributeActionsOnEncrypt )
212
244
.keyring (kmsKeyring )
213
245
.search (SearchConfig .builder ()
@@ -235,17 +267,17 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
235
267
// 9. Put an item into our table using the above client.
236
268
// Before the item gets sent to DynamoDb, it will be encrypted
237
269
// client-side, according to our configuration.
238
- // Since our configuration includes beacons for `state ` and `zip `,
270
+ // Since our configuration includes beacons for `inspector_id_last4 ` and `unit `,
239
271
// the client will add two additional attributes to the item. These attributes will have names
240
- // `aws_dbe_b_state ` and `aws_dbe_b_zip `. Their values will be HMACs
272
+ // `aws_dbe_b_inspector_id_last4 ` and `aws_dbe_b_unit `. Their values will be HMACs
241
273
// truncated to as many bits as the beacon's `length` parameter; e.g.
242
- // aws_dbe_b_state = truncate(HMAC("WA "), 4 )
243
- // aws_dbe_b_zip = truncate(HMAC("98101 "), 10 )
274
+ // aws_dbe_b_inspector_id_last4 = truncate(HMAC("4321 "), 10 )
275
+ // aws_dbe_b_unit = truncate(HMAC("123456789012 "), 30 )
244
276
final HashMap <String , AttributeValue > item = new HashMap <>();
245
- item .put ("customer_id " , AttributeValue .builder ().s ("ABCD-1234 " ).build ());
246
- item .put ("create_time " , AttributeValue .builder ().n ( "1681495205 " ).build ());
247
- item .put ("state " , AttributeValue .builder ().s ("WA " ).build ());
248
- item .put ("zip " , AttributeValue .builder ().s ("98101 " ).build ());
277
+ item .put ("work_id " , AttributeValue .builder ().s ("1313ba89-5661-41eb-ba6c-cb1b4cb67b2d " ).build ());
278
+ item .put ("inspection_date " , AttributeValue .builder ().s ( "2023-06-13 " ).build ());
279
+ item .put ("inspector_id_last4 " , AttributeValue .builder ().s ("4321 " ).build ());
280
+ item .put ("unit " , AttributeValue .builder ().s ("123456789012 " ).build ());
249
281
250
282
final PutItemRequest putRequest = PutItemRequest .builder ()
251
283
.tableName (ddbTableName )
@@ -262,23 +294,26 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
262
294
// and transform the query to use the beaconized name and value.
263
295
// Internally, the client will query for and receive all items with a matching HMAC value in the beacon field.
264
296
// This may include a number of "false positives" with different ciphertext, but the same truncated HMAC.
265
- // e.g. if truncate(HMAC("WA"), 4) == truncate(HMAC("DC"), 4), the query will return both items.
297
+ // e.g. if truncate(HMAC("123456789012"), 30)
298
+ // == truncate(HMAC("098765432109"), 30),
299
+ // the query will return both items.
266
300
// The client will decrypt all returned items to determine which ones have the expected attribute values,
267
301
// and only surface items with the correct plaintext to the user.
268
302
// This procedure is internal to the client and is abstracted away from the user;
269
- // e.g. the user will only see "WA" and never "DC", though the actual query returned both.
303
+ // e.g. the user will only see "123456789012" and never
304
+ // "098765432109", though the actual query returned both.
270
305
Map <String ,String > expressionAttributesNames = new HashMap <>();
271
- expressionAttributesNames .put ("#s " , "state " );
272
- expressionAttributesNames .put ("#z " , "zip " );
306
+ expressionAttributesNames .put ("#last4 " , "inspector_id_last4 " );
307
+ expressionAttributesNames .put ("#unit " , "unit " );
273
308
274
309
Map <String ,AttributeValue > expressionAttributeValues = new HashMap <>();
275
- expressionAttributeValues .put (":s " , AttributeValue .builder ().s ("WA " ).build ());
276
- expressionAttributeValues .put (":z " , AttributeValue .builder ().s ("98101 " ).build ());
310
+ expressionAttributeValues .put (":last4 " , AttributeValue .builder ().s ("4321 " ).build ());
311
+ expressionAttributeValues .put (":unit " , AttributeValue .builder ().s ("123456789012 " ).build ());
277
312
278
313
QueryRequest queryRequest = QueryRequest .builder ()
279
314
.tableName (ddbTableName )
280
315
.indexName (GSI_NAME )
281
- .keyConditionExpression ("#s = :s and #z = :z " )
316
+ .keyConditionExpression ("#last4 = :last4 and #unit = :unit " )
282
317
.expressionAttributeNames (expressionAttributesNames )
283
318
.expressionAttributeValues (expressionAttributeValues )
284
319
.build ();
@@ -291,8 +326,8 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
291
326
assert attributeValues .size () == 1 ;
292
327
final Map <String , AttributeValue > returnedItem = attributeValues .get (0 );
293
328
// Validate the item has the expected attributes
294
- assert returnedItem .get ("state " ).s ().equals ("WA " );
295
- assert returnedItem .get ("zip " ).s ().equals ("98101 " );
329
+ assert returnedItem .get ("inspector_id_last4 " ).s ().equals ("4321 " );
330
+ assert returnedItem .get ("unit " ).s ().equals ("123456789012 " );
296
331
}
297
332
298
333
public static void main (final String [] args ) {
0 commit comments