File tree 3 files changed +21
-10
lines changed
3 files changed +21
-10
lines changed Original file line number Diff line number Diff line change @@ -5,6 +5,9 @@ output_base_dir: /data/output
5
5
dataset_id : mrm8488/fake-news
6
6
dataset_input_field_name : text
7
7
dataset_output_field_name : label
8
+ dataset_output_field_values_to_texts :
9
+ 0 : " Real"
10
+ 1 : " Fake"
8
11
dataset_train_split_seed : 42
9
12
dataset_train_split_test_size : 0.2
10
13
lora_r : 8
@@ -20,16 +23,16 @@ inference_max_new_tokens: 2
20
23
evaluations :
21
24
-
22
25
prompt : " Donald Trump has never been President of the United States."
23
- expected_output : " 1 "
26
+ expected_output : " Fake "
24
27
-
25
28
prompt : " The Earth is flat."
26
- expected_output : " 1 "
29
+ expected_output : " Fake "
27
30
-
28
31
prompt : " Martians visited Japan in 2011."
29
- expected_output : " 1 "
32
+ expected_output : " Fake "
30
33
-
31
34
prompt : " The World Trade Center collapsed when the plane hit it."
32
- expected_output : " 0 "
35
+ expected_output : " Real "
33
36
-
34
- expected_output : " 0"
35
37
prompt : " The United States is a country in North America."
38
+ expected_output : " Real"
Original file line number Diff line number Diff line change @@ -5,6 +5,9 @@ output_base_dir: /data/output
5
5
dataset_id : mrm8488/fake-news
6
6
dataset_input_field_name : text
7
7
dataset_output_field_name : label
8
+ dataset_output_field_values_to_texts :
9
+ 0 : " Real"
10
+ 1 : " Fake"
8
11
dataset_train_split_seed : 42
9
12
dataset_train_split_test_size : 0.2
10
13
lora_r : 8
@@ -20,16 +23,16 @@ inference_max_new_tokens: 2
20
23
evaluations :
21
24
-
22
25
prompt : " Donald Trump has never been President of the United States."
23
- expected_output : " 1 "
26
+ expected_output : " Fake "
24
27
-
25
28
prompt : " The Earth is flat."
26
- expected_output : " 1 "
29
+ expected_output : " Fake "
27
30
-
28
31
prompt : " Martians visited Japan in 2011."
29
- expected_output : " 1 "
32
+ expected_output : " Fake "
30
33
-
31
34
prompt : " The World Trade Center collapsed when the plane hit it."
32
- expected_output : " 0 "
35
+ expected_output : " Real "
33
36
-
34
- expected_output : " 0"
35
37
prompt : " The United States is a country in North America."
38
+ expected_output : " Real"
Original file line number Diff line number Diff line change @@ -125,6 +125,11 @@ def prepare_train_data(dataset_id):
125
125
data_df ["text" ] = data_df [input_field_name ].apply (lambda x : simple_template_for_pretrain (x ))
126
126
else :
127
127
output_field_name = train_config ["dataset_output_field_name" ]
128
+ if "dataset_output_field_values_to_texts" in train_config :
129
+ output_field_values_to_texts = train_config ["dataset_output_field_values_to_texts" ]
130
+ data_df [output_field_name ] = data_df [output_field_name ].apply (
131
+ lambda x : output_field_values_to_texts .get (x , x )
132
+ )
128
133
if "dataset_context_field_name" in train_config :
129
134
context_field_name = train_config ["dataset_context_field_name" ]
130
135
if "dataset_context_hint" not in train_config :
You can’t perform that action at this time.
0 commit comments