-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
gh-149807: Fix hash(frozendict): compute (key, value) pair hash #149841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Fix ``hash(frozendict)``: compute the hash of each ``(key, value)`` pair | ||
| correctly. Patch by Victor Stinner. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8228,6 +8228,40 @@ _shuffle_bits(Py_uhash_t h) | |
| return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; | ||
| } | ||
|
|
||
| // Compute hash((key, value)). | ||
| // Code copied from tuple_hash(). | ||
| static Py_hash_t | ||
| frozendict_pair_hash(PyObject *key, PyObject *value) | ||
| { | ||
| Py_ssize_t len = 2; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps make this a |
||
| Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; | ||
|
|
||
| Py_uhash_t lane = PyObject_Hash(key); | ||
| if (lane == (Py_uhash_t)-1) { | ||
| return -1; | ||
| } | ||
| acc += lane * _PyTuple_HASH_XXPRIME_2; | ||
| acc = _PyTuple_HASH_XXROTATE(acc); | ||
| acc *= _PyTuple_HASH_XXPRIME_1; | ||
|
|
||
| lane = PyObject_Hash(value); | ||
| if (lane == (Py_uhash_t)-1) { | ||
| return -1; | ||
| } | ||
| acc += lane * _PyTuple_HASH_XXPRIME_2; | ||
| acc = _PyTuple_HASH_XXROTATE(acc); | ||
| acc *= _PyTuple_HASH_XXPRIME_1; | ||
|
|
||
| /* Add input length, mangled to keep the historical value of hash(()). */ | ||
| acc += len ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL); | ||
|
|
||
| if (acc == (Py_uhash_t)-1) { | ||
| acc = 1546275796; | ||
| } | ||
| return acc; | ||
| } | ||
|
|
||
|
|
||
| // Code copied from frozenset_hash() | ||
| static Py_hash_t | ||
| frozendict_hash(PyObject *op) | ||
|
|
@@ -8244,17 +8278,11 @@ frozendict_hash(PyObject *op) | |
| PyObject *key, *value; // borrowed refs | ||
| Py_ssize_t pos = 0; | ||
| while (PyDict_Next(op, &pos, &key, &value)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason not to do |
||
| Py_hash_t key_hash = PyObject_Hash(key); | ||
| if (key_hash == -1) { | ||
| return -1; | ||
| } | ||
| hash ^= _shuffle_bits(key_hash); | ||
|
|
||
| Py_hash_t value_hash = PyObject_Hash(value); | ||
| if (value_hash == -1) { | ||
| Py_hash_t pair_hash = frozendict_pair_hash(key, value); | ||
| if (pair_hash == -1) { | ||
| return -1; | ||
| } | ||
| hash ^= _shuffle_bits(value_hash); | ||
| hash ^= _shuffle_bits(pair_hash); | ||
| } | ||
|
|
||
| /* Factor in the number of active entries */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps simpler, just verify the PEP 814 guarantee that
hash(fd) == hash(frozenset(fd.items()))? There's no strict rule that says none of these hashes can collide, especially with the vagaries of the seeded string hashes, but we can verify that the behavior matches the PEP 814 spec and trust that thefrozensetandtuplehash algorithms are adequate. This also provides a good test to catch if someone does update thetupleorfrozensethashing in a way that would break compatibility.