Skip to content

Commit

Permalink
Add clearer error messages for datatype mismatch in HDFStore.append. …
Browse files Browse the repository at this point in the history
…Raise ValueError when nan_rep too large for pytable column. Add and modify applicable test code.
  • Loading branch information
JakeTT404 committed Feb 3, 2025
1 parent e84a7f7 commit 6dd0216
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
11 changes: 11 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3524,6 +3524,14 @@ def validate(self, other) -> None:
# Value of type "Optional[Any]" is not indexable [index]
oax = ov[i] # type: ignore[index]
if sax != oax:
## Raise clearer error if mismatching type on values_axes
if c == "values_axes" and sax.kind != oax.kind:
raise TypeError(
f"Cannot serialize the column [{oax.values[0]}] "
f"because its data contents are not [{oax.kind}] "
f"but [{sax.kind}] object dtype"
)
# Fallback if other source of difference
raise ValueError(
f"invalid combination of [{c}] on appending data "
f"[{sax}] vs current table [{oax}]"
Expand Down Expand Up @@ -5136,6 +5144,9 @@ def _maybe_convert_for_string_atom(
data = bvalues.copy()
data[mask] = nan_rep

if existing_col and mask.any() and len(nan_rep) > existing_col.itemsize:
raise ValueError("NaN representation is too large for existing column size")

# see if we have a valid string type
inferred_type = lib.infer_dtype(data, skipna=False)
if inferred_type != "string":
Expand Down
19 changes: 10 additions & 9 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,14 @@ def check_col(key, name, size):
with pytest.raises(ValueError, match=msg):
store.append("df_new", df_new)

# bigger NaN representation on next append
df_new = DataFrame([[124, "a"], [346, "b"]])
store.append("df_new2", df_new)
df_new = DataFrame([[124, None], [346, "b"]])
msg = "NaN representation is too large for existing column size"
with pytest.raises(ValueError, match=msg):
store.append("df_new2", df_new)

# min_itemsize on Series index (GH 11412)
df = DataFrame(
{
Expand Down Expand Up @@ -822,15 +830,8 @@ def test_append_raise(setup_path):
df["foo"] = Timestamp("20130101")
store.append("df", df)
df["foo"] = "bar"
msg = re.escape(
"invalid combination of [values_axes] on appending data "
"[name->values_block_1,cname->values_block_1,"
"dtype->bytes24,kind->string,shape->(1, 30)] "
"vs current table "
"[name->values_block_1,cname->values_block_1,"
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
)
with pytest.raises(ValueError, match=msg):
msg = re.escape("Cannot serialize the column [foo] but [string] object dtype")
with pytest.raises(TypeError, match=msg):
store.append("df", df)


Expand Down

0 comments on commit 6dd0216

Please sign in to comment.