Skip to content

Commit 42305ef

Browse files
committed
Added preprocessing
1 parent 182bc82 commit 42305ef

File tree

3 files changed

+7
-3
lines changed

3 files changed

+7
-3
lines changed

.env.example

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
OPENAI_API_KEY=sk-<your-api-key>
1+
OPENAI_API_KEY=sk-<your-api-key>
2+
HF_TOKEN=your-token

inference.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@
3333
model = AdvancedTokenAnonymizerModel(best_model)
3434
for index, row in df.iterrows():
3535
text = row["source_text"]
36+
mask = row["privacy_mask"]
3637

37-
privacy_mask = ast.literal_eval(row["privacy_mask"]) if str(row['privacy_mask']) != 'nan' else []
38+
if type(mask) == str and str(mask) != 'nan':
39+
mask = ast.literal_eval(row["privacy_mask"])
40+
privacy_mask = mask
3841

3942
trues.append(convert_chars_to_tokens_mapping(text, privacy_mask, model.tokenizer))
4043
preds.append(model.predict(text))

src/anonymizer/AdvancedTokenAnonymizerModel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from TokenAnonymizerModel import TokenAnonymizerModel
1+
from .TokenAnonymizerModel import TokenAnonymizerModel
22

33
class AdvancedTokenAnonymizerModel(TokenAnonymizerModel):
44
def __init__(self, model_name):

0 commit comments

Comments
 (0)