diff --git a/src/huggingface_hub/_commit_api.py b/src/huggingface_hub/_commit_api.py index cdf861bbdf..763787e68a 100644 --- a/src/huggingface_hub/_commit_api.py +++ b/src/huggingface_hub/_commit_api.py @@ -39,7 +39,7 @@ UploadMode = Literal["lfs", "regular"] -# Max is 1,000 per request on the Hub for HfApi.list_files_info +# Max is 1,000 per request on the Hub for HfApi.get_paths_info # Otherwise we get: # HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters # See https://github.com/huggingface/huggingface_hub/issues/1503 @@ -555,7 +555,7 @@ def _fetch_lfs_files_to_copy( [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) If the Hub API response is improperly formatted. """ - from .hf_api import HfApi + from .hf_api import HfApi, RepoFolder hf_api = HfApi(endpoint=endpoint, token=token) files_to_copy = {} @@ -563,13 +563,15 @@ def _fetch_lfs_files_to_copy( operations = list(operations) # type: ignore paths = [op.src_path_in_repo for op in operations] for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE): - src_repo_files = hf_api.list_files_info( + src_repo_files = hf_api.get_paths_info( repo_id=repo_id, paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE], revision=src_revision or revision, repo_type=repo_type, ) for src_repo_file in src_repo_files: + if isinstance(src_repo_file, RepoFolder): + raise NotImplementedError("Copying a folder is not implemented.") if not src_repo_file.lfs: raise NotImplementedError("Copying a non-LFS file is not implemented") files_to_copy[(src_repo_file.rfilename, src_revision)] = src_repo_file diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 833a303b05..110fd592eb 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -130,6 +130,7 @@ validate_hf_hub_args, ) from .utils import tqdm as hf_tqdm +from .utils._deprecation import _deprecate_method from .utils._typing import CallableT from .utils.endpoint_helpers import ( DatasetFilter, @@ -2376,6 +2377,7 @@ def file_exists( return False @validate_hf_hub_args + @_deprecate_method(version="0.23", message="Use `list_repo_tree` and `get_paths_info` instead.") def list_files_info( self, repo_id: str, @@ -2588,9 +2590,10 @@ def list_repo_files( """ return [ f.rfilename - for f in self.list_files_info( - repo_id=repo_id, paths=None, revision=revision, repo_type=repo_type, token=token + for f in self.list_repo_tree( + repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token ) + if isinstance(f, RepoFile) ] @validate_hf_hub_args diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 32197aa7e2..5be2eab542 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -102,6 +102,7 @@ DUMMY_MODEL_ID, DUMMY_MODEL_ID_REVISION_ONE_SPECIFIC_COMMIT, SAMPLE_DATASET_IDENTIFIER, + expect_deprecation, repo_name, require_git_lfs, rmtree_with_retry, @@ -1003,7 +1004,7 @@ def test_commit_copy_file(self, repo_url: RepoUrl) -> None: self.assertIn("lfs Copy (1).bin", repo_files) # Check same LFS file - repo_file1, repo_file2 = self._api.list_files_info(repo_id=repo_id, paths=["lfs.bin", "lfs Copy.bin"]) + repo_file1, repo_file2 = self._api.get_paths_info(repo_id=repo_id, paths=["lfs.bin", "lfs Copy.bin"]) self.assertEqual(repo_file1.lfs["sha256"], repo_file2.lfs["sha256"]) @use_tmp_repo() @@ -1161,6 +1162,7 @@ def setUpClass(cls): def tearDownClass(cls): cls._api.delete_repo(repo_id=cls.repo_id) + @expect_deprecation("list_files_info") def test_get_regular_file_info(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths="file.md")) self.assertEqual(len(files), 1) @@ -1171,6 +1173,7 @@ def test_get_regular_file_info(self): self.assertEqual(file.size, 4) self.assertEqual(file.blob_id, "6320cd248dd8aeaab759d5871f8781b5c0505172") + @expect_deprecation("list_files_info") def test_get_lfs_file_info(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths="lfs.bin")) self.assertEqual(len(files), 1) @@ -1188,34 +1191,41 @@ def test_get_lfs_file_info(self): self.assertEqual(file.size, 4) self.assertEqual(file.blob_id, "0a828055346279420bd02a4221c177bbcdc045d8") + @expect_deprecation("list_files_info") def test_list_files(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "lfs.bin", "2/file_2.md"])) self.assertEqual(len(files), 3) self.assertEqual({f.path for f in files}, {"file.md", "lfs.bin", "2/file_2.md"}) + @expect_deprecation("list_files_info") def test_list_files_and_folder(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "lfs.bin", "2"])) self.assertEqual(len(files), 3) self.assertEqual({f.path for f in files}, {"file.md", "lfs.bin", "2/file_2.md"}) + @expect_deprecation("list_files_info") def test_list_unknown_path_among_other(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "unknown"])) self.assertEqual(len(files), 1) + @expect_deprecation("list_files_info") def test_list_unknown_path_alone(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths="unknown")) self.assertEqual(len(files), 0) + @expect_deprecation("list_files_info") def test_list_folder_flat(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["2"])) self.assertEqual(len(files), 1) self.assertEqual(files[0].path, "2/file_2.md") + @expect_deprecation("list_files_info") def test_list_folder_recursively(self): files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["1"])) self.assertEqual(len(files), 2) self.assertEqual({f.path for f in files}, {"1/2/file_1_2.md", "1/file_1.md"}) + @expect_deprecation("list_files_info") def test_list_repo_files_manually(self): files = list(self._api.list_files_info(repo_id=self.repo_id)) self.assertEqual(len(files), 7) @@ -1224,22 +1234,26 @@ def test_list_repo_files_manually(self): {".gitattributes", "1/2/file_1_2.md", "1/file_1.md", "2/file_2.md", "3/file_3.md", "file.md", "lfs.bin"}, ) + @expect_deprecation("list_files_info") def test_list_repo_files_alias(self): self.assertEqual( - set(self._api.list_repo_files(repo_id=self.repo_id)), + set(f.path for f in self._api.list_files_info(repo_id=self.repo_id)), {".gitattributes", "1/2/file_1_2.md", "1/file_1.md", "2/file_2.md", "3/file_3.md", "file.md", "lfs.bin"}, ) + @expect_deprecation("list_files_info") def test_list_with_root_path_is_ignored(self): # must use `paths=None` files = list(self._api.list_files_info(repo_id=self.repo_id, paths="/")) self.assertEqual(len(files), 0) + @expect_deprecation("list_files_info") def test_list_with_empty_path_is_invalid(self): # must use `paths=None` with self.assertRaises(BadRequestError): list(self._api.list_files_info(repo_id=self.repo_id, paths="")) + @expect_deprecation("list_files_info") @with_production_testing def test_list_files_with_expand(self): files = list( @@ -1259,6 +1273,7 @@ def test_list_files_with_expand(self): self.assertTrue(vae_model.security["safe"]) self.assertTrue(isinstance(vae_model.security["av_scan"], dict)) # all details in here + @expect_deprecation("list_files_info") @with_production_testing def test_list_files_without_expand(self): files = list(