Skip to content

Commit 16b8315

Browse files
committed
loading pretraining datasets
1 parent 33f8c9e commit 16b8315

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

load_pretraining_datasets.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from datasets import load_dataset
2+
import pandas as pd
3+
import numpy as np
4+
5+
# Load pre-training tables from huggingface
6+
7+
if __name__ == '__main__':
8+
data = {}
9+
dataset = load_dataset(path='ztphs980/taptap_datasets')
10+
dataset = dataset['train'].to_dict()
11+
for table_name, table in zip(dataset['dataset_name'], dataset['table']):
12+
table = pd.DataFrame.from_dict(eval(table, {'nan': np.nan}))
13+
data[table_name] = table
14+
15+
# The key-value pair of data corresponds to the table name and the table (in pd.DataFrame)

0 commit comments

Comments
 (0)