pythongh-111545: Add PyHash_Double() function

vstinner · vstinner · commit 26fc990a1278 · 2023-11-15T13:03:17.000+01:00
* Add again _PyHASH_NAN constant.
* _Py_HashDouble(NULL, value) now returns _PyHASH_NAN.
* Add tests: Modules/_testcapi/hash.c and
  Lib/test/test_capi/test_hash.py.
diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst
@@ -5,12 +5,16 @@ PyHash API
 
 See also the :c:member:`PyTypeObject.tp_hash` member.
 
+Types
+^^^^^
+
 .. c:type:: Py_hash_t
 
    Hash value type: signed integer.
 
    .. versionadded:: 3.2
 
+
 .. c:type:: Py_uhash_t
 
    Hash value type: unsigned integer.
@@ -41,8 +45,28 @@ See also the :c:member:`PyTypeObject.tp_hash` member.
    .. versionadded:: 3.4
 
 
+Functions
+^^^^^^^^^
+
+.. c:function:: Py_hash_t PyHash_Double(double value, PyObject *obj)
+
+   Hash a C double number.
+
+   If *value* is not-a-number (NaN):
+
+   * If *obj* is not ``NULL``, return the hash of the *obj* pointer.
+   * Otherwise, return :data:`sys.hash_info.nan <sys.hash_info>` (``0``).
+
+   The function cannot fail: it cannot return ``-1``.
+
+   .. versionadded:: 3.13
+
+
 .. c:function:: PyHash_FuncDef* PyHash_GetFuncDef(void)
 
    Get the hash function definition.
 
+   .. seealso::
+      :pep:`456` "Secure and interchangeable hash algorithm".
+
    .. versionadded:: 3.4
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
@@ -1034,7 +1034,13 @@ always available.
 
    .. attribute:: hash_info.nan
 
-      (This attribute is no longer used)
+      The hash value returned for not-a-number (NaN).
+
+      This hash value is only used by the :c:func:`Py_HashDouble` C function
+      when the *obj* argument is ``NULL``.
+
+      .. versionchanged:: 3.10
+         This hash value is no longer used to hash numbers in Python.
 
    .. attribute:: hash_info.imag
 
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
@@ -1181,6 +1181,9 @@ New Features
   :exc:`KeyError` if the key missing.
   (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.)
 
+* Add :c:func:`PyHash_Double` function to hash a C double number.
+  (Contributed by Victor Stinner in :gh:`111545`.)
+
 
 Porting to Python 3.13
 ----------------------
diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h
@@ -11,3 +11,5 @@ typedef struct {
 } PyHash_FuncDef;
 
 PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
+
+PyAPI_FUNC(Py_hash_t) PyHash_Double(double value, PyObject *obj);
diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h
@@ -32,6 +32,7 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
 
 #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
 #define _PyHASH_INF 314159
+#define _PyHASH_NAN 0
 #define _PyHASH_IMAG _PyHASH_MULTIPLIER
 
 /* Hash secret
diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py
@@ -1,9 +1,11 @@
+import math
 import sys
 import unittest
 from test.support import import_helper
 _testcapi = import_helper.import_module('_testcapi')
 
 
+NULL = None
 SIZEOF_PY_HASH_T = _testcapi.SIZEOF_VOID_P
 
 
@@ -31,3 +33,57 @@ def test_hash_getfuncdef(self):
         self.assertEqual(func_def.name, hash_info.algorithm)
         self.assertEqual(func_def.hash_bits, hash_info.hash_bits)
         self.assertEqual(func_def.seed_bits, hash_info.seed_bits)
+
+    def test_hash_double(self):
+        # Test PyHash_Double()
+        hash_double = _testcapi.hash_double
+        marker = object()
+        marker_hash = hash(marker)
+
+        # test integers
+        def python_hash_int(x):
+            self.assertIsInstance(x, int)
+            return hash(x)
+
+        integers = [
+            *range(1, 30),
+            2**30 - 1,
+            2 ** 233,
+            int(sys.float_info.max),
+        ]
+        integers.extend([-x for x in integers])
+        integers.append(0)
+
+        for x in integers:
+            for obj in (NULL, marker):
+                with self.subTest(x=x, obj=obj):
+                    self.assertEqual(hash_double(float(x), obj),
+                                     python_hash_int(x))
+
+        # test +inf and -inf
+        for obj in (NULL, marker):
+            with self.subTest(obj=obj):
+                self.assertEqual(hash_double(float('inf')), sys.hash_info.inf)
+                self.assertEqual(hash_double(float('-inf')), -sys.hash_info.inf)
+
+        # test not-a-number (NaN)
+        self.assertEqual(hash_double(float('nan'), marker), marker_hash)
+        self.assertEqual(hash_double(float('nan'), NULL), sys.hash_info.nan)
+
+        # special float values: compare with Python hash() function
+        def python_hash_double(x):
+            return hash(x)
+
+        special_values = (
+            math.nextafter(0.0, 1.0),  # smallest positive subnormal number
+            sys.float_info.min,        # smallest positive normal number
+            sys.float_info.epsilon,
+            sys.float_info.max,        # largest positive finite number
+        )
+        for x in special_values:
+            for obj in (NULL, marker):
+                with self.subTest(x=x, obj=obj):
+                    self.assertEqual(hash_double(x, obj),
+                                     python_hash_double(x))
+                    self.assertEqual(hash_double(-x, obj),
+                                     python_hash_double(-x))
diff --git a/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst
@@ -0,0 +1,2 @@
+Add :c:func:`PyHash_Double` function to hash a C double number. Patch by
+Victor Stinner.
diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c
@@ -1,6 +1,7 @@
 #include "parts.h"
 #include "util.h"
 
+
 static PyObject *
 hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
 {
@@ -44,8 +45,26 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
     return result;
 }
 
+
+static PyObject *
+hash_double(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    double value;
+    PyObject *obj = NULL;
+    if (!PyArg_ParseTuple(args, "d|O", &value, &obj)) {
+        return NULL;
+    }
+    NULLABLE(obj);
+    Py_hash_t hash = PyHash_Double(value, obj);
+    assert(hash != -1);
+    Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
+    return PyLong_FromLongLong(hash);
+}
+
+
 static PyMethodDef test_methods[] = {
     {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
+    {"hash_double", hash_double, METH_VARARGS},
     {NULL},
 };
 
diff --git a/Python/pyhash.c b/Python/pyhash.c
@@ -86,7 +86,7 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
 Py_hash_t _Py_HashPointer(const void *);
 
 Py_hash_t
-_Py_HashDouble(PyObject *inst, double v)
+PyHash_Double(double v, PyObject *obj)
 {
     int e, sign;
     double m;
@@ -95,8 +95,15 @@ _Py_HashDouble(PyObject *inst, double v)
     if (!Py_IS_FINITE(v)) {
         if (Py_IS_INFINITY(v))
             return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
-        else
-            return _Py_HashPointer(inst);
+        else {
+            assert(Py_IS_NAN(v));
+            if (obj != NULL) {
+                return _Py_HashPointer(obj);
+            }
+            else {
+                return _PyHASH_NAN;
+            }
+        }
     }
 
     m = frexp(v, &e);
@@ -131,6 +138,12 @@ _Py_HashDouble(PyObject *inst, double v)
     return (Py_hash_t)x;
 }
 
+Py_hash_t
+_Py_HashDouble(PyObject *obj, double v)
+{
+    return PyHash_Double(v, obj);
+}
+
 Py_hash_t
 _Py_HashPointerRaw(const void *p)
 {
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
@@ -1497,7 +1497,7 @@ get_hash_info(PyThreadState *tstate)
     PyStructSequence_SET_ITEM(hash_info, field++,
                               PyLong_FromLong(_PyHASH_INF));
     PyStructSequence_SET_ITEM(hash_info, field++,
-                              PyLong_FromLong(0));  // This is no longer used
+                              PyLong_FromLong(_PyHASH_NAN));
     PyStructSequence_SET_ITEM(hash_info, field++,
                               PyLong_FromLong(_PyHASH_IMAG));
     PyStructSequence_SET_ITEM(hash_info, field++,

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Add :c:func:`PyHash_Double` function to hash a C double number. Patch by
	`2`	`+Victor Stinner.`