Skip to content

Commit 40ae287

Browse files
authored
fix get_dataset (#835)
* fix `get_dataset` * format
1 parent 536339e commit 40ae287

File tree

2 files changed

+19
-22
lines changed

2 files changed

+19
-22
lines changed

promptsource/app.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,13 +313,18 @@ def show_text(t, width=WIDTH, with_markdown=False):
313313
except OSError as e:
314314
st.error(
315315
f"Some datasets are not handled automatically by `datasets` and require users to download the "
316-
f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
317-
f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
316+
f"dataset manually. It is possibly the case for {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
317+
f"\n\nIf so, please download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
318318
f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
319319
f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
320320
f"\n\nOriginal error:\n{str(e)}"
321321
)
322322
st.stop()
323+
except Exception as e:
324+
st.error(
325+
f"An error occured while loading the dataset {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
326+
f"\\n\nOriginal error:\n{str(e)}"
327+
)
323328

324329
splits = list(dataset.keys())
325330
index = 0

promptsource/utils.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,33 +46,25 @@ def get_dataset_builder(path, conf=None):
4646

4747
def get_dataset(path, conf=None):
4848
"Get a dataset from name and conf."
49-
builder_instance = get_dataset_builder(path, conf)
50-
if builder_instance.manual_download_instructions is None and builder_instance.info.size_in_bytes is not None:
51-
builder_instance.download_and_prepare()
52-
return builder_instance.as_dataset()
53-
else:
54-
return load_dataset(path, conf)
55-
56-
57-
def load_dataset(dataset_name, subset_name):
5849
try:
59-
return datasets.load_dataset(dataset_name, subset_name)
50+
return datasets.load_dataset(path, conf)
6051
except datasets.builder.ManualDownloadError:
6152
cache_root_dir = (
6253
os.environ["PROMPTSOURCE_MANUAL_DATASET_DIR"]
6354
if "PROMPTSOURCE_MANUAL_DATASET_DIR" in os.environ
6455
else DEFAULT_PROMPTSOURCE_CACHE_HOME
6556
)
66-
data_dir = (
67-
f"{cache_root_dir}/{dataset_name}"
68-
if subset_name is None
69-
else f"{cache_root_dir}/{dataset_name}/{subset_name}"
70-
)
71-
return datasets.load_dataset(
72-
dataset_name,
73-
subset_name,
74-
data_dir=data_dir,
75-
)
57+
data_dir = f"{cache_root_dir}/{path}" if conf is None else f"{cache_root_dir}/{path}/{conf}"
58+
try:
59+
return datasets.load_dataset(
60+
path,
61+
conf,
62+
data_dir=data_dir,
63+
)
64+
except Exception as err:
65+
raise err
66+
except Exception as err:
67+
raise err
7668

7769

7870
def get_dataset_confs(path):

0 commit comments

Comments
 (0)