@@ -111,9 +111,12 @@ def __post_init__(self):
111
111
class MultilingualNanosetDatasetsArgs :
112
112
training_folder : Union [str , dict , List [str ]]
113
113
validation_folder : Union [str , List [str ]]
114
- languages : List [str ] # NOTE(tj.solergibert) Required for 1. Aggregating the result 2. Reporting to WANDB
114
+ domains : Optional [List [str ]] = None # NOTE(tj.solergibert) Required for 1. Aggregating the result 2. Reporting to WANDB
115
+ languages : Optional [List [str ]] = None # NOTE(@paultltc): For back-compatibility
115
116
116
117
def __post_init__ (self ):
118
+ if self .languages is not None and self .domains is None :
119
+ self .domains = self .languages
117
120
if isinstance (self .training_folder , str ): # Case 1: 1 Dataset folder
118
121
self .training_folder = [self .training_folder ]
119
122
self .validation_folder = [self .validation_folder ]
@@ -125,13 +128,13 @@ def __post_init__(self):
125
128
self .training_folder = list (tmp_training_folder .keys ())
126
129
self .dataset_weights = list (tmp_training_folder .values ())
127
130
128
- assert len (self .training_folder ) == len (
129
- self .languages
130
- ), f"The sizes of training_folder and languages mismatch ({ len (self .training_folder )} vs { len (self .languages )} )"
131
+ # assert len(self.training_folder) == len(
132
+ # self.domains
133
+ # ), f"The sizes of training_folder and domains mismatch ({len(self.training_folder)} vs {len(self.domains )})"
131
134
132
- assert len (self .training_folder ) == len (
133
- self .validation_folder
134
- ), f"The sizes of training_folder and validation_folder mismatch ({ len (self .training_folder )} vs { len (self .validation_folder )} )"
135
+ # assert len(self.training_folder) == len(
136
+ # self.validation_folder
137
+ # ), f"The sizes of training_folder and validation_folder mismatch ({len(self.training_folder)} vs {len(self.validation_folder)})"
135
138
136
139
137
140
@dataclass
@@ -189,7 +192,6 @@ class GeneralArgs:
189
192
190
193
Args:
191
194
project: Name of the project (a project gather several runs in common tensorboard/hub-folders)
192
- entity: Weights and bias entity name (optional)
193
195
run: Name of the run
194
196
step: Global step (updated when we save the checkpoint)
195
197
consumed_train_samples: Number of samples consumed during training (should be actually just step*batch_size)
0 commit comments