Skip to content

Commit

Permalink
Merge pull request #393 from OpenCOMPES/bugfix-numba-binning
Browse files Browse the repository at this point in the history
Exception handling for binning when df cols are of non-numeric types
  • Loading branch information
rettigl authored May 8, 2024
2 parents eca7cd2 + b4e0686 commit 1e47b4c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
9 changes: 8 additions & 1 deletion sed/binning/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,14 @@ def bin_partition(
apply_jitter_on_column(sel_part, amp * binsize, col, mode)
vals = sel_part.values
else:
vals = part.values[:, col_id]
vals = part.iloc[:, col_id].values
if vals.dtype == "object":
raise ValueError(
"Binning requires all binned dataframe columns to be of numeric type. "
"Encountered data types were "
f"{[part.columns[id] + ': ' + str(part.iloc[:, id].dtype) for id in col_id]}. "
"Please make sure all axes data are of numeric type.",
)
if hist_mode == "numba":
hist_partition, edges = numba_histogramdd(
vals,
Expand Down
14 changes: 14 additions & 0 deletions tests/test_binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,20 @@ def test_bin_partition() -> None:
assert np.allclose(cast(np.ndarray, res), res1)


def test_non_numeric_dtype_error() -> None:
"""Test bin_partition function"""
pdf = sample_pdf.astype({"x": "string", "y": "int32", "z": "int32"})
with pytest.raises(ValueError) as err:
_ = bin_partition(
part=pdf,
bins=bins, # type: ignore[arg-type]
axes=columns,
ranges=ranges,
skip_test=False,
)
assert "Encountered data types were ['x: string', 'y: int32', 'z: int32']" in str(err.value)


def test_bin_dataframe() -> None:
"""Test bin_dataframe function"""
res = bin_dataframe(df=sample_ddf, bins=bins, axes=columns, ranges=ranges)
Expand Down

0 comments on commit 1e47b4c

Please sign in to comment.