Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
@@ -4220,7 +4220,7 @@ similar to how ``read_csv`` and ``to_csv`` work.
.. ipython:: python

df_tl = pd.DataFrame({"A": list(range(5)), "B": list(range(5))})
df_tl.to_hdf("store_tl.h5", "table", append=True)
df_tl.to_hdf("store_tl.h5", key="table", append=True)
pd.read_hdf("store_tl.h5", "table", where=["index>2"])

.. ipython:: python
@@ -4243,12 +4243,12 @@ HDFStore will by default not drop rows that are all missing. This behavior can b
)
df_with_missing

df_with_missing.to_hdf("file.h5", "df_with_missing", format="table", mode="w")
df_with_missing.to_hdf("file.h5", key="df_with_missing", format="table", mode="w")

pd.read_hdf("file.h5", "df_with_missing")

df_with_missing.to_hdf(
"file.h5", "df_with_missing", format="table", mode="w", dropna=True
"file.h5", key="df_with_missing", format="table", mode="w", dropna=True
)
pd.read_hdf("file.h5", "df_with_missing")

@@ -4278,7 +4278,7 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for
.. ipython:: python
:okexcept:

pd.DataFrame(np.random.randn(10, 2)).to_hdf("test_fixed.h5", "df")
pd.DataFrame(np.random.randn(10, 2)).to_hdf("test_fixed.h5", key="df")
pd.read_hdf("test_fixed.h5", "df", where="index>5")

.. ipython:: python
@@ -6321,23 +6321,23 @@ The following test functions will be used below to compare the performance of se


def test_hdf_fixed_write(df):
df.to_hdf("test_fixed.hdf", "test", mode="w")
df.to_hdf("test_fixed.hdf", key="test", mode="w")


def test_hdf_fixed_read():
pd.read_hdf("test_fixed.hdf", "test")


def test_hdf_fixed_write_compress(df):
df.to_hdf("test_fixed_compress.hdf", "test", mode="w", complib="blosc")
df.to_hdf("test_fixed_compress.hdf", key="test", mode="w", complib="blosc")


def test_hdf_fixed_read_compress():
pd.read_hdf("test_fixed_compress.hdf", "test")


def test_hdf_table_write(df):
df.to_hdf("test_table.hdf", "test", mode="w", format="table")
df.to_hdf("test_table.hdf", key="test", mode="w", format="table")


def test_hdf_table_read():
@@ -6346,7 +6346,7 @@ The following test functions will be used below to compare the performance of se

def test_hdf_table_write_compress(df):
df.to_hdf(
"test_table_compress.hdf", "test", mode="w", complib="blosc", format="table"
"test_table_compress.hdf", key="test", mode="w", complib="blosc", format="table"
)


2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.11.0.rst
Original file line number Diff line number Diff line change
@@ -347,7 +347,7 @@ Enhancements
.. ipython:: python

df = pd.DataFrame({'A': range(5), 'B': range(5)})
df.to_hdf('store.h5', 'table', append=True)
df.to_hdf('store.h5', key='table', append=True)
pd.read_hdf('store.h5', 'table', where=['index > 2'])

.. ipython:: python
8 changes: 4 additions & 4 deletions doc/source/whatsnew/v0.13.0.rst
Original file line number Diff line number Diff line change
@@ -385,7 +385,7 @@ HDFStore API changes
dfq = pd.DataFrame(np.random.randn(10, 4),
columns=list('ABCD'),
index=pd.date_range('20130101', periods=10))
dfq.to_hdf(path, 'dfq', format='table', data_columns=True)
dfq.to_hdf(path, key='dfq', format='table', data_columns=True)

Use boolean expressions, with in-line function evaluation.

@@ -415,9 +415,9 @@ HDFStore API changes

path = 'test.h5'
df = pd.DataFrame(np.random.randn(10, 2))
df.to_hdf(path, 'df_table', format='table')
df.to_hdf(path, 'df_table2', append=True)
df.to_hdf(path, 'df_fixed')
df.to_hdf(path, key='df_table', format='table')
df.to_hdf(path, key='df_table2', append=True)
df.to_hdf(path, key='df_fixed')
with pd.HDFStore(path) as store:
print(store)

4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.17.0.rst
Original file line number Diff line number Diff line change
@@ -793,7 +793,7 @@ Previous behavior:

In [27]:
df_with_missing.to_hdf('file.h5',
'df_with_missing',
key='df_with_missing',
format='table',
mode='w')

@@ -809,7 +809,7 @@ New behavior:

.. ipython:: python

df_with_missing.to_hdf("file.h5", "df_with_missing", format="table", mode="w")
df_with_missing.to_hdf("file.h5", key="df_with_missing", format="table", mode="w")

pd.read_hdf("file.h5", "df_with_missing")

2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.rst
Original file line number Diff line number Diff line change
@@ -1059,7 +1059,7 @@ usually resulting in an invalid comparison, returning an empty result frame. The
.. ipython:: python

df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']})
df.to_hdf('store.h5', 'key', format='table', data_columns=True)
df.to_hdf('store.h5', key='key', format='table', data_columns=True)
df.dtypes

Previous behavior:
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
@@ -92,6 +92,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_hdf` except ``path_or_buf``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
-

3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
@@ -2642,6 +2642,9 @@ def to_json(
)

@final
@deprecate_nonkeyword_arguments(
version="3.0", allowed_args=["self", "path_or_buf"], name="to_hdf"
)
def to_hdf(
self,
path_or_buf: FilePath | HDFStore,
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
@@ -643,7 +643,7 @@ def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_d
tm.assert_frame_equal(result, expected)

path = tmp_path / "test.hdf"
df.to_hdf(path, "df", format="table")
df.to_hdf(path, key="df", format="table")
result = read_hdf(path, "df", columns=["A", "B"])
expected = df.reindex(columns=["A", "B"])
tm.assert_frame_equal(result, expected)
8 changes: 4 additions & 4 deletions pandas/tests/io/pytables/test_categorical.py
Original file line number Diff line number Diff line change
@@ -152,7 +152,7 @@ def test_categorical_conversion(tmp_path, setup_path):
# We are expecting an empty DataFrame matching types of df
expected = df.iloc[[], :]
path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", data_columns=True)
df.to_hdf(path, key="df", format="table", data_columns=True)
result = read_hdf(path, "df", where="obsids=B")
tm.assert_frame_equal(result, expected)

@@ -163,7 +163,7 @@ def test_categorical_conversion(tmp_path, setup_path):
# We are expecting an empty DataFrame matching types of df
expected = df.iloc[[], :]
path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", data_columns=True)
df.to_hdf(path, key="df", format="table", data_columns=True)
result = read_hdf(path, "df", where="obsids=B")
tm.assert_frame_equal(result, expected)

@@ -185,7 +185,7 @@ def test_categorical_nan_only_columns(tmp_path, setup_path):
df["d"] = df.b.astype("category")
expected = df
path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", data_columns=True)
df.to_hdf(path, key="df", format="table", data_columns=True)
result = read_hdf(path, "df")
tm.assert_frame_equal(result, expected)

@@ -209,6 +209,6 @@ def test_convert_value(
expected.col = expected.col.cat.set_categories(categorical_values)

path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", min_itemsize=max_widths)
df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
result = read_hdf(path, where=where)
tm.assert_frame_equal(result, expected)
22 changes: 11 additions & 11 deletions pandas/tests/io/pytables/test_complex.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ def test_complex_fixed(tmp_path, setup_path):
)

path = tmp_path / setup_path
df.to_hdf(path, "df")
df.to_hdf(path, key="df")
reread = read_hdf(path, "df")
tm.assert_frame_equal(df, reread)

@@ -30,7 +30,7 @@ def test_complex_fixed(tmp_path, setup_path):
columns=list("ABCDE"),
)
path = tmp_path / setup_path
df.to_hdf(path, "df")
df.to_hdf(path, key="df")
reread = read_hdf(path, "df")
tm.assert_frame_equal(df, reread)

@@ -43,8 +43,8 @@ def test_complex_table(tmp_path, setup_path):
)

path = tmp_path / setup_path
df.to_hdf(path, "df", format="table")
reread = read_hdf(path, "df")
df.to_hdf(path, key="df", format="table")
reread = read_hdf(path, key="df")
tm.assert_frame_equal(df, reread)

df = DataFrame(
@@ -54,7 +54,7 @@ def test_complex_table(tmp_path, setup_path):
)

path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", mode="w")
df.to_hdf(path, key="df", format="table", mode="w")
reread = read_hdf(path, "df")
tm.assert_frame_equal(df, reread)

@@ -77,7 +77,7 @@ def test_complex_mixed_fixed(tmp_path, setup_path):
index=list("abcd"),
)
path = tmp_path / setup_path
df.to_hdf(path, "df")
df.to_hdf(path, key="df")
reread = read_hdf(path, "df")
tm.assert_frame_equal(df, reread)

@@ -106,7 +106,7 @@ def test_complex_mixed_table(tmp_path, setup_path):
tm.assert_frame_equal(df.loc[df.A > 2], result)

path = tmp_path / setup_path
df.to_hdf(path, "df", format="table")
df.to_hdf(path, key="df", format="table")
reread = read_hdf(path, "df")
tm.assert_frame_equal(df, reread)

@@ -120,7 +120,7 @@ def test_complex_across_dimensions_fixed(tmp_path, setup_path):
comps = [tm.assert_series_equal, tm.assert_frame_equal]
for obj, comp in zip(objs, comps):
path = tmp_path / setup_path
obj.to_hdf(path, "obj", format="fixed")
obj.to_hdf(path, key="obj", format="fixed")
reread = read_hdf(path, "obj")
comp(obj, reread)

@@ -131,7 +131,7 @@ def test_complex_across_dimensions(tmp_path, setup_path):
df = DataFrame({"A": s, "B": s})

path = tmp_path / setup_path
df.to_hdf(path, "obj", format="table")
df.to_hdf(path, key="obj", format="table")
reread = read_hdf(path, "obj")
tm.assert_frame_equal(df, reread)

@@ -172,10 +172,10 @@ def test_complex_series_error(tmp_path, setup_path):

path = tmp_path / setup_path
with pytest.raises(TypeError, match=msg):
s.to_hdf(path, "obj", format="t")
s.to_hdf(path, key="obj", format="t")

path = tmp_path / setup_path
s.to_hdf(path, "obj", format="t", index=False)
s.to_hdf(path, key="obj", format="t", index=False)
reread = read_hdf(path, "obj")
tm.assert_series_equal(s, reread)

10 changes: 5 additions & 5 deletions pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
@@ -115,7 +115,7 @@ def test_invalid_terms(tmp_path, setup_path):
columns=list("ABCD"),
index=date_range("20130101", periods=10),
)
dfq.to_hdf(path, "dfq", format="table", data_columns=True)
dfq.to_hdf(path, key="dfq", format="table", data_columns=True)

# check ok
read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']")
@@ -128,7 +128,7 @@ def test_invalid_terms(tmp_path, setup_path):
columns=list("ABCD"),
index=date_range("20130101", periods=10),
)
dfq.to_hdf(path, "dfq", format="table")
dfq.to_hdf(path, key="dfq", format="table")

msg = (
r"The passed where expression: A>0 or C>0\n\s*"
@@ -169,7 +169,7 @@ def test_invalid_complib(setup_path):
with tm.ensure_clean(setup_path) as path:
msg = r"complib only supports \[.*\] compression."
with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df", complib="foolib")
df.to_hdf(path, key="df", complib="foolib")


@pytest.mark.parametrize(
@@ -185,7 +185,7 @@ def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path):
df = DataFrame(0, index=mi, columns=["a"])
path = tmp_path / setup_path
with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
df.to_hdf(path, "df")
df.to_hdf(path, key="df")


def test_unsuppored_hdf_file_error(datapath):
@@ -212,7 +212,7 @@ def test_read_hdf_errors(setup_path, tmp_path):
with pytest.raises(OSError, match=msg):
read_hdf(path, "key")

df.to_hdf(path, "df")
df.to_hdf(path, key="df")
store = HDFStore(path, mode="r")
store.close()

22 changes: 11 additions & 11 deletions pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
@@ -64,10 +64,10 @@ def test_mode(setup_path, tmp_path, mode):
# conv write
if mode in ["r", "r+"]:
with pytest.raises(OSError, match=msg):
df.to_hdf(path, "df", mode=mode)
df.to_hdf(path, "df", mode="w")
df.to_hdf(path, key="df", mode=mode)
df.to_hdf(path, key="df", mode="w")
else:
df.to_hdf(path, "df", mode=mode)
df.to_hdf(path, key="df", mode=mode)

# conv read
if mode in ["w"]:
@@ -86,7 +86,7 @@ def test_default_mode(tmp_path, setup_path):
# read_hdf uses default mode
df = tm.makeTimeDataFrame()
path = tmp_path / setup_path
df.to_hdf(path, "df", mode="w")
df.to_hdf(path, key="df", mode="w")
result = read_hdf(path, "df")
tm.assert_frame_equal(result, df)

@@ -177,7 +177,7 @@ def test_complibs_default_settings(tmp_path, setup_path):
# Set complevel and check if complib is automatically set to
# default value
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, "df", complevel=9)
df.to_hdf(tmpfile, key="df", complevel=9)
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)

@@ -188,7 +188,7 @@ def test_complibs_default_settings(tmp_path, setup_path):

# Set complib and check to see if compression is disabled
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, "df", complib="zlib")
df.to_hdf(tmpfile, key="df", complib="zlib")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)

@@ -199,7 +199,7 @@ def test_complibs_default_settings(tmp_path, setup_path):

# Check if not setting complib or complevel results in no compression
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, "df")
df.to_hdf(tmpfile, key="df")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)

@@ -253,7 +253,7 @@ def test_complibs(tmp_path, lvl, lib):
gname = f"{lvl}_{lib}"

# Write and read file to see if data is consistent
df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
df.to_hdf(tmpfile, key=gname, complib=lib, complevel=lvl)
result = read_hdf(tmpfile, gname)
tm.assert_frame_equal(result, df)

@@ -308,7 +308,7 @@ def test_latin_encoding(tmp_path, setup_path, dtype, val):
ser = Series(val, dtype=dtype)

store = tmp_path / setup_path
ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep)
ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
retr = read_hdf(store, key)

s_nan = ser.replace(nan_rep, np.nan)
@@ -322,7 +322,7 @@ def test_multiple_open_close(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeDataFrame()
df.to_hdf(path, "df", mode="w", format="table")
df.to_hdf(path, key="df", mode="w", format="table")

# single
store = HDFStore(path)
@@ -399,7 +399,7 @@ def test_multiple_open_close(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeDataFrame()
df.to_hdf(path, "df", mode="w", format="table")
df.to_hdf(path, key="df", mode="w", format="table")

store = HDFStore(path)
store.close()
10 changes: 5 additions & 5 deletions pandas/tests/io/pytables/test_put.py
Original file line number Diff line number Diff line change
@@ -71,17 +71,17 @@ def test_api_default_format(tmp_path, setup_path):
df = tm.makeDataFrame()

with pd.option_context("io.hdf.default_format", "fixed"):
df.to_hdf(path, "df")
df.to_hdf(path, key="df")
with HDFStore(path) as store:
assert not store.get_storer("df").is_table
with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df2", append=True)
df.to_hdf(path, key="df2", append=True)

with pd.option_context("io.hdf.default_format", "table"):
df.to_hdf(path, "df3")
df.to_hdf(path, key="df3")
with HDFStore(path) as store:
assert store.get_storer("df3").is_table
df.to_hdf(path, "df4", append=True)
df.to_hdf(path, key="df4", append=True)
with HDFStore(path) as store:
assert store.get_storer("df4").is_table

@@ -354,6 +354,6 @@ def test_store_periodindex(tmp_path, setup_path, format):
)

path = tmp_path / setup_path
df.to_hdf(path, "df", mode="w", format=format)
df.to_hdf(path, key="df", mode="w", format=format)
expected = pd.read_hdf(path, "df")
tm.assert_frame_equal(df, expected)
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_pytables_missing.py
Original file line number Diff line number Diff line change
@@ -11,4 +11,4 @@ def test_pytables_raises():
df = pd.DataFrame({"A": [1, 2]})
with pytest.raises(ImportError, match="tables"):
with tm.ensure_clean("foo.h5") as path:
df.to_hdf(path, "df")
df.to_hdf(path, key="df")
32 changes: 16 additions & 16 deletions pandas/tests/io/pytables/test_read.py
Original file line number Diff line number Diff line change
@@ -32,35 +32,35 @@ def test_read_missing_key_close_store(tmp_path, setup_path):
# GH 25766
path = tmp_path / setup_path
df = DataFrame({"a": range(2), "b": range(2)})
df.to_hdf(path, "k1")
df.to_hdf(path, key="k1")

with pytest.raises(KeyError, match="'No object named k2 in the file'"):
read_hdf(path, "k2")

# smoke test to test that file is properly closed after
# read with KeyError before another write
df.to_hdf(path, "k2")
df.to_hdf(path, key="k2")


def test_read_index_error_close_store(tmp_path, setup_path):
# GH 25766
path = tmp_path / setup_path
df = DataFrame({"A": [], "B": []}, index=[])
df.to_hdf(path, "k1")
df.to_hdf(path, key="k1")

with pytest.raises(IndexError, match=r"list index out of range"):
read_hdf(path, "k1", stop=0)

# smoke test to test that file is properly closed after
# read with IndexError before another write
df.to_hdf(path, "k1")
df.to_hdf(path, key="k1")


def test_read_missing_key_opened_store(tmp_path, setup_path):
# GH 28699
path = tmp_path / setup_path
df = DataFrame({"a": range(2), "b": range(2)})
df.to_hdf(path, "k1")
df.to_hdf(path, key="k1")

with HDFStore(path, "r") as store:
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
@@ -222,7 +222,7 @@ def test_read_hdf_open_store(tmp_path, setup_path):
df = df.set_index(keys="E", append=True)

path = tmp_path / setup_path
df.to_hdf(path, "df", mode="w")
df.to_hdf(path, key="df", mode="w")
direct = read_hdf(path, "df")
with HDFStore(path, mode="r") as store:
indirect = read_hdf(store, "df")
@@ -241,7 +241,7 @@ def test_read_hdf_index_not_view(tmp_path, setup_path):
)

path = tmp_path / setup_path
df.to_hdf(path, "df", mode="w", format="table")
df.to_hdf(path, key="df", mode="w", format="table")

df2 = read_hdf(path, "df")
assert df2.index._data.base is None
@@ -258,7 +258,7 @@ def test_read_hdf_iterator(tmp_path, setup_path):
df = df.set_index(keys="E", append=True)

path = tmp_path / setup_path
df.to_hdf(path, "df", mode="w", format="t")
df.to_hdf(path, key="df", mode="w", format="t")
direct = read_hdf(path, "df")
iterator = read_hdf(path, "df", iterator=True)
with closing(iterator.store):
@@ -278,10 +278,10 @@ def test_read_nokey(tmp_path, setup_path):
# Categorical dtype not supported for "fixed" format. So no need
# to test with that dtype in the dataframe here.
path = tmp_path / setup_path
df.to_hdf(path, "df", mode="a")
df.to_hdf(path, key="df", mode="a")
reread = read_hdf(path)
tm.assert_frame_equal(df, reread)
df.to_hdf(path, "df2", mode="a")
df.to_hdf(path, key="df2", mode="a")

msg = "key must be provided when HDF5 file contains multiple datasets."
with pytest.raises(ValueError, match=msg):
@@ -293,10 +293,10 @@ def test_read_nokey_table(tmp_path, setup_path):
df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})

path = tmp_path / setup_path
df.to_hdf(path, "df", mode="a", format="table")
df.to_hdf(path, key="df", mode="a", format="table")
reread = read_hdf(path)
tm.assert_frame_equal(df, reread)
df.to_hdf(path, "df2", mode="a", format="table")
df.to_hdf(path, key="df2", mode="a", format="table")

msg = "key must be provided when HDF5 file contains multiple datasets."
with pytest.raises(ValueError, match=msg):
@@ -325,8 +325,8 @@ def test_read_from_pathlib_path(tmp_path, setup_path):
filename = tmp_path / setup_path
path_obj = Path(filename)

expected.to_hdf(path_obj, "df", mode="a")
actual = read_hdf(path_obj, "df")
expected.to_hdf(path_obj, key="df", mode="a")
actual = read_hdf(path_obj, key="df")

tm.assert_frame_equal(expected, actual)

@@ -344,8 +344,8 @@ def test_read_from_py_localpath(tmp_path, setup_path):
filename = tmp_path / setup_path
path_obj = LocalPath(filename)

expected.to_hdf(path_obj, "df", mode="a")
actual = read_hdf(path_obj, "df")
expected.to_hdf(path_obj, key="df", mode="a")
actual = read_hdf(path_obj, key="df")

tm.assert_frame_equal(expected, actual)

10 changes: 5 additions & 5 deletions pandas/tests/io/pytables/test_retain_attributes.py
Original file line number Diff line number Diff line change
@@ -78,24 +78,24 @@ def test_retain_index_attributes2(tmp_path, setup_path):
df = DataFrame(
{"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))}
)
df.to_hdf(path, "data", mode="w", append=True)
df.to_hdf(path, key="data", mode="w", append=True)
df2 = DataFrame(
{"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))}
)

df2.to_hdf(path, "data", append=True)
df2.to_hdf(path, key="data", append=True)

idx = date_range("2000-1-1", periods=3, freq="H")
idx.name = "foo"
df = DataFrame({"A": Series(range(3), index=idx)})
df.to_hdf(path, "data", mode="w", append=True)
df.to_hdf(path, key="data", mode="w", append=True)

assert read_hdf(path, "data").index.name == "foo"
assert read_hdf(path, key="data").index.name == "foo"

with tm.assert_produces_warning(errors.AttributeConflictWarning):
idx2 = date_range("2001-1-1", periods=3, freq="H")
idx2.name = "bar"
df2 = DataFrame({"A": Series(range(3), index=idx2)})
df2.to_hdf(path, "data", append=True)
df2.to_hdf(path, key="data", append=True)

assert read_hdf(path, "data").index.name is None
38 changes: 19 additions & 19 deletions pandas/tests/io/pytables/test_round_trip.py
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ def test_conv_read_write():
with tm.ensure_clean() as path:

def roundtrip(key, obj, **kwargs):
obj.to_hdf(path, key, **kwargs)
obj.to_hdf(path, key=key, **kwargs)
return read_hdf(path, key)

o = tm.makeTimeSeries()
@@ -43,7 +43,7 @@ def roundtrip(key, obj, **kwargs):

# table
df = DataFrame({"A": range(5), "B": range(5)})
df.to_hdf(path, "table", append=True)
df.to_hdf(path, key="table", append=True)
result = read_hdf(path, "table", where=["index>2"])
tm.assert_frame_equal(df[df.index > 2], result)

@@ -65,44 +65,44 @@ def test_api(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeDataFrame()
df.iloc[:10].to_hdf(path, "df", append=True, format="table")
df.iloc[10:].to_hdf(path, "df", append=True, format="table")
df.iloc[:10].to_hdf(path, key="df", append=True, format="table")
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
tm.assert_frame_equal(read_hdf(path, "df"), df)

# append to False
df.iloc[:10].to_hdf(path, "df", append=False, format="table")
df.iloc[10:].to_hdf(path, "df", append=True, format="table")
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
tm.assert_frame_equal(read_hdf(path, "df"), df)


def test_api_append(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeDataFrame()
df.iloc[:10].to_hdf(path, "df", append=True)
df.iloc[10:].to_hdf(path, "df", append=True, format="table")
df.iloc[:10].to_hdf(path, key="df", append=True)
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
tm.assert_frame_equal(read_hdf(path, "df"), df)

# append to False
df.iloc[:10].to_hdf(path, "df", append=False, format="table")
df.iloc[10:].to_hdf(path, "df", append=True)
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
df.iloc[10:].to_hdf(path, key="df", append=True)
tm.assert_frame_equal(read_hdf(path, "df"), df)


def test_api_2(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeDataFrame()
df.to_hdf(path, "df", append=False, format="fixed")
df.to_hdf(path, key="df", append=False, format="fixed")
tm.assert_frame_equal(read_hdf(path, "df"), df)

df.to_hdf(path, "df", append=False, format="f")
df.to_hdf(path, key="df", append=False, format="f")
tm.assert_frame_equal(read_hdf(path, "df"), df)

df.to_hdf(path, "df", append=False)
df.to_hdf(path, key="df", append=False)
tm.assert_frame_equal(read_hdf(path, "df"), df)

df.to_hdf(path, "df")
df.to_hdf(path, key="df")
tm.assert_frame_equal(read_hdf(path, "df"), df)

with ensure_clean_store(setup_path) as store:
@@ -139,18 +139,18 @@ def test_api_invalid(tmp_path, setup_path):
msg = "Can only append to Tables"

with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df", append=True, format="f")
df.to_hdf(path, key="df", append=True, format="f")

with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df", append=True, format="fixed")
df.to_hdf(path, key="df", append=True, format="fixed")

msg = r"invalid HDFStore format specified \[foo\]"

with pytest.raises(TypeError, match=msg):
df.to_hdf(path, "df", append=True, format="foo")
df.to_hdf(path, key="df", append=True, format="foo")

with pytest.raises(TypeError, match=msg):
df.to_hdf(path, "df", append=False, format="foo")
df.to_hdf(path, key="df", append=False, format="foo")

# File path doesn't exist
path = ""
@@ -521,7 +521,7 @@ def test_round_trip_equals(tmp_path, setup_path):
df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

path = tmp_path / setup_path
df.to_hdf(path, "df", format="table")
df.to_hdf(path, key="df", format="table")
other = read_hdf(path, "df")
tm.assert_frame_equal(df, other)
assert df.equals(other)
8 changes: 4 additions & 4 deletions pandas/tests/io/pytables/test_select.py
Original file line number Diff line number Diff line change
@@ -349,7 +349,7 @@ def test_select_iterator(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeTimeDataFrame(500)
df.to_hdf(path, "df_non_table")
df.to_hdf(path, key="df_non_table")

msg = "can only use an iterator or chunksize on a table"
with pytest.raises(TypeError, match=msg):
@@ -361,7 +361,7 @@ def test_select_iterator(tmp_path, setup_path):
path = tmp_path / setup_path

df = tm.makeTimeDataFrame(500)
df.to_hdf(path, "df", format="table")
df.to_hdf(path, key="df", format="table")

results = list(read_hdf(path, "df", chunksize=100))
result = concat(results)
@@ -657,7 +657,7 @@ def test_frame_select_complex2(tmp_path):

# use non-trivial selection criteria
params = DataFrame({"A": [1, 1, 2, 2, 3]})
params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])
params.to_hdf(pp, key="df", mode="w", format="table", data_columns=["A"])

selection = read_hdf(pp, "df", where="A=[2,3]")
hist = DataFrame(
@@ -668,7 +668,7 @@ def test_frame_select_complex2(tmp_path):
),
)

hist.to_hdf(hh, "df", mode="w", format="table")
hist.to_hdf(hh, key="df", mode="w", format="table")

expected = read_hdf(hh, "df", where="l1=[2, 3, 4]")

50 changes: 31 additions & 19 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
@@ -288,35 +288,47 @@ def test_store_dropna(tmp_path, setup_path):
# # Test to make sure defaults are to not drop.
# # Corresponding to Issue 9382
path = tmp_path / setup_path
df_with_missing.to_hdf(path, "df", format="table")
df_with_missing.to_hdf(path, key="df", format="table")
reloaded = read_hdf(path, "df")
tm.assert_frame_equal(df_with_missing, reloaded)

path = tmp_path / setup_path
df_with_missing.to_hdf(path, "df", format="table", dropna=False)
df_with_missing.to_hdf(path, key="df", format="table", dropna=False)
reloaded = read_hdf(path, "df")
tm.assert_frame_equal(df_with_missing, reloaded)

path = tmp_path / setup_path
df_with_missing.to_hdf(path, "df", format="table", dropna=True)
df_with_missing.to_hdf(path, key="df", format="table", dropna=True)
reloaded = read_hdf(path, "df")
tm.assert_frame_equal(df_without_missing, reloaded)


def test_keyword_deprecation():
# GH 54229
msg = (
"Starting with pandas version 3.0 all arguments of to_hdf except for the "
"argument 'path_or_buf' will be keyword-only."
)
df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}])

with tm.assert_produces_warning(FutureWarning, match=msg):
df.to_hdf("example", "key")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test is leaving behind an "example" file. can you do a follow-up to use tm.ensure_clean to avoid this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def test_to_hdf_with_min_itemsize(tmp_path, setup_path):
path = tmp_path / setup_path

# min_itemsize in index with to_hdf (GH 10381)
df = tm.makeMixedDataFrame().set_index("C")
df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6})
df.to_hdf(path, key="ss3", format="table", min_itemsize={"index": 6})
# just make sure there is a longer string:
df2 = df.copy().reset_index().assign(C="longer").set_index("C")
df2.to_hdf(path, "ss3", append=True, format="table")
df2.to_hdf(path, key="ss3", append=True, format="table")
tm.assert_frame_equal(read_hdf(path, "ss3"), concat([df, df2]))

# same as above, with a Series
df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6})
df2["B"].to_hdf(path, "ss4", append=True, format="table")
df["B"].to_hdf(path, key="ss4", format="table", min_itemsize={"index": 6})
df2["B"].to_hdf(path, key="ss4", append=True, format="table")
tm.assert_series_equal(read_hdf(path, "ss4"), concat([df["B"], df2["B"]]))


@@ -326,7 +338,7 @@ def test_to_hdf_errors(tmp_path, format, setup_path):
ser = Series(data, index=Index(data))
path = tmp_path / setup_path
# GH 20835
ser.to_hdf(path, "table", format=format, errors="surrogatepass")
ser.to_hdf(path, key="table", format=format, errors="surrogatepass")

result = read_hdf(path, "table", errors="surrogatepass")
tm.assert_series_equal(result, ser)
@@ -542,7 +554,7 @@ def test_store_index_name_numpy_str(tmp_path, table_format, setup_path):

# This used to fail, returning numpy strings instead of python strings.
path = tmp_path / setup_path
df.to_hdf(path, "df", format=table_format)
df.to_hdf(path, key="df", format=table_format)
df2 = read_hdf(path, "df")

tm.assert_frame_equal(df, df2, check_names=True)
@@ -780,7 +792,7 @@ def test_path_pathlib():
df = tm.makeDataFrame()

result = tm.round_trip_pathlib(
lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df")
lambda p: df.to_hdf(p, key="df"), lambda p: read_hdf(p, "df")
)
tm.assert_frame_equal(df, result)

@@ -807,7 +819,7 @@ def test_path_pathlib_hdfstore():

def writer(path):
with HDFStore(path) as store:
df.to_hdf(store, "df")
df.to_hdf(store, key="df")

def reader(path):
with HDFStore(path) as store:
@@ -820,7 +832,7 @@ def reader(path):
def test_pickle_path_localpath():
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(
lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df")
lambda p: df.to_hdf(p, key="df"), lambda p: read_hdf(p, "df")
)
tm.assert_frame_equal(df, result)

@@ -830,7 +842,7 @@ def test_path_localpath_hdfstore():

def writer(path):
with HDFStore(path) as store:
df.to_hdf(store, "df")
df.to_hdf(store, key="df")

def reader(path):
with HDFStore(path) as store:
@@ -876,9 +888,9 @@ def test_duplicate_column_name(tmp_path, setup_path):
path = tmp_path / setup_path
msg = "Columns index has to be unique for fixed format"
with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df", format="fixed")
df.to_hdf(path, key="df", format="fixed")

df.to_hdf(path, "df", format="table")
df.to_hdf(path, key="df", format="table")
other = read_hdf(path, "df")

tm.assert_frame_equal(df, other)
@@ -911,7 +923,7 @@ def test_columns_multiindex_modified(tmp_path, setup_path):
path = tmp_path / setup_path
df.to_hdf(
path,
"df",
key="df",
mode="a",
append=True,
data_columns=data_columns,
@@ -947,14 +959,14 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path):
path = tmp_path / setup_path
msg = "cannot have non-object label DataIndexableCol"
with pytest.raises(ValueError, match=msg):
df.to_hdf(path, "df", format="table", data_columns=True)
df.to_hdf(path, key="df", format="table", data_columns=True)

for index in types_should_run:
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 2)), columns=index(2)
)
path = tmp_path / setup_path
df.to_hdf(path, "df", format="table", data_columns=True)
df.to_hdf(path, key="df", format="table", data_columns=True)
result = read_hdf(path, "df", where=f"index = [{df.index[0]}]")
assert len(result)

@@ -975,6 +987,6 @@ def test_store_bool_index(tmp_path, setup_path):
# # Test to make sure defaults are to not drop.
# # Corresponding to Issue 9382
path = tmp_path / setup_path
df.to_hdf(path, "a")
df.to_hdf(path, key="a")
result = read_hdf(path, "a")
tm.assert_frame_equal(expected, result)
4 changes: 2 additions & 2 deletions pandas/tests/io/pytables/test_subclass.py
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ def test_supported_for_subclass_dataframe(self, tmp_path):
expected = DataFrame(data, dtype=np.intp)

path = tmp_path / "temp.h5"
sdf.to_hdf(path, "df")
sdf.to_hdf(path, key="df")
result = read_hdf(path, "df")
tm.assert_frame_equal(result, expected)

@@ -41,7 +41,7 @@ def test_supported_for_subclass_series(self, tmp_path):
expected = Series(data, dtype=np.intp)

path = tmp_path / "temp.h5"
sser.to_hdf(path, "ser")
sser.to_hdf(path, key="ser")
result = read_hdf(path, "ser")
tm.assert_series_equal(result, expected)