-
Notifications
You must be signed in to change notification settings - Fork 25
INTPYTHON-527 Add Queryable Encryption support #329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 123 commits
bc52c8e
38fb110
65bd15a
e08945b
7b34b44
8e83ada
4da895c
ed54a9b
8a7766c
eab2f2e
10a361e
01d5485
db32487
b2be223
27d4b8e
c4d1c66
2772aff
d2ddf4e
e25357e
6487086
bc76db3
4dbaa8f
9cc5ad2
c751b2d
b13a07f
534da6b
13578ab
3342d7f
9fd21e4
9bbe741
d1eb737
176f016
264b37a
1771f56
819058a
9a3c18e
071192e
b2a0534
81cc887
be3dd16
a2342e2
05a7610
96b3fda
1eb71d5
08209d3
90fe562
8c2b84c
ab680fd
4a267f5
3fdc1f7
d562a76
163758d
b95c343
5205a0b
b07c3e6
e557632
c5f8888
a7bc5c5
09423bc
c756cf8
841797c
2386397
d685d2a
08ea317
3e839d7
75c6936
534452f
bf26a8a
bf078ad
2780e32
31d3feb
b005726
e7290e4
76deec0
02ce21e
c8a5118
39f1cbc
e504fc5
0aa423f
c27be37
13de3bb
7e3cd34
4a9daa7
516642f
a319e8e
5807033
37e7e06
f19c901
528d503
c7c091b
b3a302b
acb0554
67a640d
9e76295
97196ed
1614919
a1bc5f3
75c3cd1
a81d2ae
e562718
3dca177
3432818
cb7f153
e0ef5b3
ba4a6c8
1b9a714
3340ae7
c90406b
8a1f381
edb2fa6
0701160
08f7934
2c4d53b
9919ce9
45ea5b5
3e468e7
8869bc1
8a05af8
3353fd0
948d21c
aae8df9
9c7c82f
cec0289
43df16a
94ecbe1
041336e
e894fe1
a683a6c
3c2bc97
7f6971b
14ad6a8
3fba90c
b88b167
48f26ea
68799fb
38332c4
b303002
97c3f8d
c1d38d5
62df289
23abbbf
61e5919
f6b2a17
e8389cb
98fdbe2
6645459
ea4599f
d8192bc
ab82d7d
2974a4c
938b55a
a2af6cc
e06a96c
fdfecf9
8bdb58c
0fea49c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# Queryable Encryption helper functions and constants for MongoDB | ||
# | ||
# These helper functions and constants are optional and Queryable | ||
# Encryption can be used in Django without them. They are provided | ||
# to make it easier configure Queryable Encryption in Django. | ||
|
||
import base64 | ||
import os | ||
|
||
KEY_VAULT_COLLECTION_NAME = "__keyVault" | ||
KEY_VAULT_DATABASE_NAME = "keyvault" | ||
KEY_VAULT_NAMESPACE = f"{KEY_VAULT_DATABASE_NAME}.{KEY_VAULT_COLLECTION_NAME}" | ||
KMS_CREDENTIALS = { | ||
"aws": { | ||
"key": os.getenv("AWS_KEY_ARN", ""), | ||
"region": os.getenv("AWS_KEY_REGION", ""), | ||
}, | ||
"azure": { | ||
"keyName": os.getenv("AZURE_KEY_NAME", ""), | ||
"keyVaultEndpoint": os.getenv("AZURE_KEY_VAULT_ENDPOINT", ""), | ||
}, | ||
"gcp": { | ||
"projectId": os.getenv("GCP_PROJECT_ID", ""), | ||
"location": os.getenv("GCP_LOCATION", ""), | ||
"keyRing": os.getenv("GCP_KEY_RING", ""), | ||
"keyName": os.getenv("GCP_KEY_NAME", ""), | ||
}, | ||
"kmip": {}, | ||
"local": {}, | ||
} | ||
KMS_PROVIDERS = { | ||
"aws": { | ||
"accessKeyId": os.getenv("AWS_ACCESS_KEY_ID", "not an access key"), | ||
"secretAccessKey": os.getenv("AWS_SECRET_ACCESS_KEY", "not a secret key"), | ||
}, | ||
"azure": { | ||
"tenantId": os.getenv("AZURE_TENANT_ID", "not a tenant ID"), | ||
"clientId": os.getenv("AZURE_CLIENT_ID", "not a client ID"), | ||
"clientSecret": os.getenv("AZURE_CLIENT_SECRET", "not a client secret"), | ||
}, | ||
"gcp": { | ||
"email": os.getenv("GCP_EMAIL", "not an email"), | ||
"privateKey": os.getenv( | ||
"GCP_PRIVATE_KEY", | ||
base64.b64encode(b"not a private key").decode("ascii"), | ||
), | ||
}, | ||
"kmip": { | ||
"endpoint": os.getenv("KMIP_KMS_ENDPOINT", "not a valid endpoint"), | ||
}, | ||
"local": { | ||
"key": bytes.fromhex( | ||
"000102030405060708090a0b0c0d0e0f" | ||
"101112131415161718191a1b1c1d1e1f" | ||
"202122232425262728292a2b2c2d2e2f" | ||
"303132333435363738393a3b3c3d3e3f" | ||
"404142434445464748494a4b4c4d4e4f" | ||
"505152535455565758595a5b5c5d5e5f" | ||
) | ||
}, | ||
} | ||
|
||
|
||
class EncryptedRouter: | ||
"""A sample database router for Django that routes encrypted | ||
models to an encrypted database with a local KMS provider. | ||
""" | ||
|
||
def allow_migrate(self, db, app_label, model_name=None, model=None, **hints): | ||
if model: | ||
return db == ("encrypted" if getattr(model, "encrypted", False) else "default") | ||
return db == "default" | ||
|
||
def db_for_read(self, model, **hints): | ||
if getattr(model, "encrypted", False): | ||
return "encrypted" | ||
return "default" | ||
|
||
db_for_write = db_for_read | ||
|
||
def kms_provider(self, model): | ||
return "local" | ||
|
||
|
||
class QueryType: | ||
""" | ||
Class that supports building encrypted equality and range queries | ||
for MongoDB's Queryable Encryption. | ||
""" | ||
|
||
@classmethod | ||
def equality(cls, *, contention=None): | ||
query = {"queryType": "equality"} | ||
if contention is not None: | ||
query["contention"] = contention | ||
return query | ||
|
||
@classmethod | ||
def range( | ||
cls, *, contention=None, max=None, min=None, precision=None, sparsity=None, trimFactor=None | ||
): | ||
query = {"queryType": "range"} | ||
options = { | ||
"contention": contention, | ||
"max": max, | ||
"min": min, | ||
"precision": precision, | ||
"sparsity": sparsity, | ||
"trimFactor": trimFactor, | ||
} | ||
query.update({k: v for k, v in options.items() if v is not None}) | ||
return query |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from django.db import models | ||
|
||
|
||
class EncryptedFieldMixin(models.Field): | ||
encrypted = True | ||
|
||
def __init__(self, *args, queries=None, **kwargs): | ||
self.queries = queries | ||
super().__init__(*args, **kwargs) | ||
|
||
def deconstruct(self): | ||
name, path, args, kwargs = super().deconstruct() | ||
|
||
if self.queries is not None: | ||
kwargs["queries"] = self.queries | ||
|
||
if path.startswith("django_mongodb_backend.fields.encryption"): | ||
path = path.replace( | ||
"django_mongodb_backend.fields.encryption", | ||
"django_mongodb_backend.fields", | ||
) | ||
|
||
return name, path, args, kwargs | ||
|
||
|
||
class EncryptedCharField(EncryptedFieldMixin, models.CharField): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't find in the docs if an encrypted collection could have an aggregate query. So my question is: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No aggregation stages are supported and two tests from Django's
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, Are lookups needed? We could try to add something. But it won't be so easy given we cannot use aggregate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know! Probably not for v1. Equality and range queries seem to be working in the tests next up is to declare "done" for char and integer and move on to the rest of the fields. Most of the examples I've seen are for char and integer so we may need to experiment to see which additional fields will work without effort, which will work with some effort, and which will not work at all. |
||
pass | ||
|
||
|
||
class EncryptedIntegerField(EncryptedFieldMixin, models.IntegerField): | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import json | ||
|
||
from django.apps import apps | ||
from django.core.management.base import BaseCommand | ||
from django.db import DEFAULT_DB_ALIAS, connections, router | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Generate a `schema_map` of encrypted fields for all encrypted" | ||
" models in the database for use with `AutoEncryptionOpts` in" | ||
" production environments." | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--database", | ||
default=DEFAULT_DB_ALIAS, | ||
help="Specify the database to use for generating the encrypted" | ||
"fields map. Defaults to the 'default' database.", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
db = options["database"] | ||
connection = connections[db] | ||
schema_map = self.get_encrypted_fields_map(connection) | ||
self.stdout.write(json.dumps(schema_map, indent=2)) | ||
|
||
def get_encrypted_fields_map(self, connection): | ||
schema_map = {} | ||
for app_config in apps.get_app_configs(): | ||
for model in router.get_migratable_models( | ||
app_config, connection.alias, include_auto_created=False | ||
): | ||
if getattr(model, "encrypted", False): | ||
fields = connection.schema_editor()._get_encrypted_fields_map(model) | ||
schema_map[model._meta.db_table] = fields | ||
return schema_map |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,11 @@ def delete(self, *args, **kwargs): | |
|
||
def save(self, *args, **kwargs): | ||
raise NotSupportedError("EmbeddedModels cannot be saved.") | ||
|
||
|
||
class EncryptedModel(models.Model): | ||
encrypted = True | ||
|
||
class Meta: | ||
abstract = True | ||
required_db_features = {"supports_queryable_encryption"} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
from django.apps import apps | ||
|
||
from django_mongodb_backend.models import EmbeddedModel | ||
from django.db.utils import ConnectionRouter | ||
|
||
|
||
class MongoRouter: | ||
|
@@ -9,10 +8,23 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): | |
EmbeddedModels don't have their own collection and must be ignored by | ||
dumpdata. | ||
""" | ||
|
||
if not model_name: | ||
return None | ||
try: | ||
model = apps.get_model(app_label, model_name) | ||
except LookupError: | ||
return None | ||
|
||
# Delay import for `register_routers` patching. | ||
from django_mongodb_backend.models import EmbeddedModel | ||
|
||
return False if issubclass(model, EmbeddedModel) else None | ||
|
||
|
||
def register_routers(): | ||
""" | ||
Patch the ConnectionRouter with methods to get KMS credentials and provider | ||
from the SchemaEditor. | ||
""" | ||
ConnectionRouter.kms_provider = ConnectionRouter._router_func("kms_provider") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, I think you'll have to write a custom function similar to |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,12 @@ | ||
from django.conf import settings | ||
from django.db import router | ||
from django.db.backends.base.schema import BaseDatabaseSchemaEditor | ||
from django.db.models import Index, UniqueConstraint | ||
from pymongo.encryption import ClientEncryption, CodecOptions | ||
from pymongo.operations import SearchIndexModel | ||
|
||
from django_mongodb_backend.indexes import SearchIndex | ||
|
||
from .fields import EmbeddedModelField | ||
from .indexes import SearchIndex | ||
from .query import wrap_database_errors | ||
from .utils import OperationCollector | ||
|
||
|
@@ -41,7 +43,7 @@ def get_database(self): | |
@wrap_database_errors | ||
@ignore_embedded_models | ||
def create_model(self, model): | ||
self.get_database().create_collection(model._meta.db_table) | ||
self._create_collection(model) | ||
self._create_model_indexes(model) | ||
# Make implicit M2M tables. | ||
for field in model._meta.local_many_to_many: | ||
|
@@ -418,3 +420,61 @@ def _field_should_have_unique(self, field): | |
db_type = field.db_type(self.connection) | ||
# The _id column is automatically unique. | ||
return db_type and field.unique and field.column != "_id" | ||
|
||
def _create_collection(self, model): | ||
""" | ||
If the model is encrypted create an encrypted collection with the | ||
encrypted fields map else create a normal collection. | ||
""" | ||
|
||
def _create_collection(self, model): | ||
""" | ||
If the model is encrypted, create an encrypted collection with the | ||
encrypted fields map; else, create a normal collection. | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. inadvertently repeated definition |
||
db = self.get_database() | ||
if getattr(model, "encrypted", False): | ||
client = self.connection.connection | ||
options = client._options.auto_encryption_opts | ||
key_vault_namespace = options._key_vault_namespace | ||
kms_providers = options._kms_providers | ||
codec_options = CodecOptions() | ||
|
||
ce = ClientEncryption(kms_providers, key_vault_namespace, client, codec_options) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is some example code that uses:
which might be more appropriate (though you wonder why |
||
|
||
# TODO: Validate schema! `create_encrypted_collection` appears to | ||
aclark4life marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# succeed no matter what you give it, as long as it's valid JSON. | ||
# E.g. encrypted_fields_map = [] | ||
encrypted_fields_map = self._get_encrypted_fields_map(model) | ||
provider = router.kms_provider(model) | ||
|
||
# TODO: Remove ternary condition when `master_key` option is not | ||
# inadvertently set to "default" somewhere, which then causes the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the database router issue I pointed out in another comment. However, perhaps the ternary is a useful condition to keep since think the user shouldn't have to define KMS_CREDENTIALS if they're using local, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it may have just been that I forgot to index the |
||
# `master_key.copy` in libmongocrypt to fail. | ||
credentials = settings.DATABASES[db].KMS_CREDENTIALS if provider != "local" else None | ||
aclark4life marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
ce.create_encrypted_collection( | ||
db, | ||
model._meta.db_table, | ||
encrypted_fields_map, | ||
provider, | ||
credentials, | ||
) | ||
else: | ||
db.create_collection(model._meta.db_table) | ||
|
||
def _get_encrypted_fields_map(self, model): | ||
connection = self.connection | ||
fields = model._meta.fields | ||
|
||
return { | ||
"fields": [ | ||
{ | ||
"bsonType": field.db_type(connection), | ||
"path": field.column, | ||
**({"queries": field.queries} if getattr(field, "queries", None) else {}), | ||
} | ||
for field in fields | ||
if getattr(field, "encrypted", False) | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there some documentation we can link to help users know how to configure credentials, providers, etc? It doesn't feel like Django's job to document and maintain this sort of mapping.
I also read:
so this won't work for that use case (I think).
I'd suggest trying to minimize the amount of "helpers" in this PR. We can always add things later if there are user pain points, but I feel these thing shouldn't be our focus for v1. Really, we should enhance MongoDB/pymongo docs if it's unclear how to construct the providers dictionary. I don't think a solution of "set these environment variables instead" is making things simpler.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's definitely not Django's job but it may be Django MongoDB Backend's job since we are trying to support QE and we may need that mapping or something like it in the schema.
Good catch! Let me test some vendors with what I have now and if we can rely on PyMongo for some of this even better.
Agreed. I definitely don't want to be in the env var business but I do want to be in the "make this feature work with minimal effort" business.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
KMS_CREDENTIALS
goes inDATABASES
also. Referencingencryption.KMS_CREDENTIALS
in schema editor doesn't look good to me for several reasons. It's "global state" as we discussed with kms providers, where it eliminates the possibility to use different provider credentials for different database aliases.Also, the use of environment couples the list of providers ("aws", "azure", etc.) as well as each providers options ("accessKeyId", "secretAccessKey", etc.) to this package's release cycle. I remain unconvinced that the environment variables solution is useful and a step toward "making this feature work with minimal effort." How are environment variables less effort than something like this:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a helper like
EncryptedRouter
orQueryType
so folks can do either:or
The helper just happens to use env vars.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feel free try to gather some consensus on the design doc, but I've tried to lay out the reasons I believe
encryption.KMS_CREDENTIALS
adds an unnecessary layer of opaque, hardcoded inflexibility. All of these helpers are extra things to test and document for not much, if any, gain, in my opinion.Jib agreed that the query type helper could be added later, if it's seen as a real pain point. (Incidentally, the current design differs from what I proposed.)
I feel you have some strong opinion that "dictionaries are ugly for configuration" or that using dictionaries is equated with "manual configuration of QE." For me, you've done a good job with the schema map part of this, and I think that's the main configuration thing to be automated, but feel free to get clarity from Jib about exactly what he meant by "manual configuration."