histogrammar · mbaak · Dec 10, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/.gitignore b/.gitignore
@@ -95,3 +95,6 @@ ENV/
 
 # Rope project settings
 .ropeproject
+
+# tests output files
+histogrammar/notebooks/*.json
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -2,6 +2,24 @@
 Release notes
 =============
 
+Version 1.0.34, Dec 2024
+------------------------
+* Fix typo in build pipeline Python versions config list.
+* Fix error in SparselyBin __eq__ method.
+* Fix test utility corner case error (test_numpy.twosigfigs function).
+* Fix error in test context manager for pandas which prevented execution of tests.
+* Fix error in expected bin count in test_numpy.test_n_bins test.
+* Prevent logging zero execution time TestNumpy class.
+
+* Remove Python 3.8 environment from build pipeline.
+* Support numpy >= 2.0.0 (np.string_ -> np.bytes_, np.unicode_ -> np.str_).
+* Remove uses of pd.util.testing.makeMixedDataFrame not available in pandas >= 2.0.0.
+* Switch from 'pkg_resources' to 'importlib' module for resolving package files.
+* Switch from 'distutils.spawn' to 'shutil.which' for finding nvcc command.
+
+* Remove unused test_gpu.twosigfigs function.
+* Refactor tests with Numpy() and Pandas() context managers to use single 'with' statement.
+
 Version 1.0.33, Dec 2022
 ------------------------
 * fix of get_sub_hist() when Bin histogram is filled only with nans.

diff --git a/README.rst b/README.rst
@@ -20,8 +20,8 @@ PyCUDA is available, they can also be filled from Numpy arrays by JIT-compiling
 
 This Python implementation of histogrammar been tested to guarantee compatibility with its Scala implementation.
 
-Latest Python release: v1.0.33 (Dec 2022).
-Latest update: Dec 2023.
+Latest Python release: v1.0.34 (Dec 2024).
+Latest update: Dec 2024.
 
 Announcements
 =============

diff --git a/histogrammar/dfinterface/filling_utils.py b/histogrammar/dfinterface/filling_utils.py
@@ -49,7 +49,7 @@ def normalize_dtype(dtype):
             # this converts pandas types, such as pd.Int64, into numpy types
             dtype = type(dtype.type())
         dtype = np.dtype(dtype).type
-        if dtype in {np.str_, np.string_}:
+        if dtype in {np.str_, np.bytes_}:
             dtype = np.dtype(str).type
         # MB 20210404: nb.object_ is kept an object -> uses to_string(). str uses only_str()
     except BaseException:
@@ -116,7 +116,7 @@ def only_str(val):
     elif isinstance(val, pd.Series):
         # at this point, data type of pd.series has already been inferred as *to be* 'string'
         dtype = np.dtype(val.dtype).type
-        return val.values if dtype in [str, np.str_, np.string_] else val.astype(str).values
+        return val.values if dtype in [str, np.str_, np.bytes_] else val.astype(str).values
     elif hasattr(val, "__iter__"):
         return np.asarray([s if isinstance(s, str) else "None" for s in val])
     return "None"

diff --git a/histogrammar/util.py b/histogrammar/util.py
@@ -21,7 +21,7 @@
 
 import histogrammar.pycparser.c_ast
 
-# Definitions for python 2/3 compatability
+# Definitions for python 2/3 compatibility
 if sys.version_info[0] > 2:
     basestring = str
     xrange = range
@@ -312,13 +312,15 @@ def function(datum):
                     if numpy is not None:
                         context["numpy"] = numpy
                         context["np"] = numpy
+                        major = int(numpy.__version__.split('.')[0])
+                        npcore = numpy._core if major > 1 else numpy.core
 
                     # if the datum is a dict, override the namespace with its dict keys
                     if isinstance(datum, dict):                # if it's a dict
                         context.update(datum)                  # use its items as variables
 
                     # if the datum is a Numpy record array, override the namespace with its field names
-                    elif numpy is not None and isinstance(datum, numpy.core.records.recarray):
+                    elif numpy is not None and isinstance(datum, npcore.records.recarray):
                         context.update(dict((n, datum[n]) for n in datum.dtype.names))
 
                     # if the datum is a Pandas DataFrame, override the namespace with its column names
@@ -584,7 +586,7 @@ def get_datatype(hist, itr=0):
             keys = list(hist.bins.keys())
             dt = type(keys[0]) if len(keys) > 0 else str
             dt = np.dtype(dt).type
-            if (dt is np.str_) or (dt is np.string_) or (dt is np.object_):
+            if (dt is np.str_) or (dt is np.bytes_) or (dt is np.object_):
                 dt = str
             datatype = [dt]
         else:

diff --git a/histogrammar/version.py b/histogrammar/version.py
@@ -3,9 +3,9 @@
 import re
 
 name = "histogrammar"
-__version__ = "1.0.33"
-version = "1.0.33"
-full_version = "1.0.33"
+__version__ = "1.0.34"
+version = "1.0.34"
+full_version = "1.0.34"
 release = True
 
 version_info = tuple(re.split(r"[-\.]", __version__))

diff --git a/requirements-test.txt b/requirements-test.txt
@@ -4,4 +4,4 @@ jupyter_client>=5.2.3
 ipykernel>=5.1.3
 pre-commit>=2.9.0
 matplotlib
-pandas<2.0.0
+pandas
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
-numpy<2.0.0
+numpy
 tqdm
 joblib>=0.14.0
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
 
 MAJOR = 1
 REVISION = 0
-PATCH = 33
+PATCH = 34
 DEV = False
 # NOTE: also update version at: README.rst and update CHANGES.rst
 

diff --git a/tests/test_gpu.py b/tests/test_gpu.py
@@ -109,9 +109,6 @@ def runNumpy(self, aggregator, expected):
             self.assertEqual(aggregator.toImmutable(), duplicate.toImmutable())
             self.assertEqual(aggregator, duplicate)
 
-    def twosigfigs(self, number):
-        return round(number, 1 - int(math.floor(math.log10(number))))
-
     def compare(self, name, hgpu, hpy, pydata, debug=False):
         sys.stderr.write(name + "\n")