From 9d78ca9a8fcdd147df6dd0e7c7c68ab6f9b41771 Mon Sep 17 00:00:00 2001 From: Jose Posada Date: Thu, 1 Apr 2021 15:59:45 -0700 Subject: [PATCH] Temporality Lf's Merged regexes, cleaned and created a single class --- .../thyme/doc_time_rel_dominant_class.tsv | 840 ++++++++++++++++++ applications/thyme/temporality.py | 369 ++++++++ applications/thyme/thyme_regexes.tsv | 199 +++++ 3 files changed, 1408 insertions(+) create mode 100644 applications/thyme/doc_time_rel_dominant_class.tsv create mode 100644 applications/thyme/temporality.py create mode 100644 applications/thyme/thyme_regexes.tsv diff --git a/applications/thyme/doc_time_rel_dominant_class.tsv b/applications/thyme/doc_time_rel_dominant_class.tsv new file mode 100644 index 0000000..04efb8e --- /dev/null +++ b/applications/thyme/doc_time_rel_dominant_class.tsv @@ -0,0 +1,840 @@ +impression 1 +plan 1 +diagnosis 1 +involve 1 +mutation 1 +pending 1 +t4 1 +n2 1 +effects 1 +infection 1 +syndrome 1 +consultation 1 +m0 1 +pleasant 1 +old 1 +dysplasia 1 +tumor 1 +identified 1 +mass 1 +invasion 1 +mucus 1 +negative 1 +denies 1 +fever 1 +chills 1 +pain 1 +fatigue 1 +issues 1 +alcohol 1 +temperature 1 +rate 1 +desc 1 +systolic 1 +diastolic 1 +hepatosplenomegaly 1 +tenderness 1 +incision 1 +regular 1 +gallops 1 +murmurs 1 +rubs 1 +normocephalic 1 +clear 1 +auscultation 1 +palpated 1 +normal 1 +distress 1 +erythema 1 +lesions 1 +lymphadenopathy 1 +moist 1 +pallor 1 +icterus 1 +edema 1 +palpable 1 +carcinoma 1 +illness 1 +shortness 1 +supervised 1 +irregular 1 +material 1 +origin 1 +benign 1 +comes 1 +wishes 1 +complaints 1 +height 1 +weight 1 +bmi 1 +bsa 1 +rhythm 1 +full 1 +sitting 1 +cuff 1 +bruits 1 +p&a 1 +masses 1 +sounds 1 +heard 1 +palpation 1 +inspected 1 +type 1 +unsure 1 +unknown 1 +spent 1 +discussing 1 +answering 1 +questions 1 +counseling 1 +grade 1 +position 1 +presents 1 +agreeable 1 +delightful 1 +melena 1 +feeling 1 +fullness 1 +comorbidities 1 +cholelithiasis 1 +nonsmoker 1 +rashes 1 +alert 1 +oriented 1 +healthy 1 +overweight 1 +anicteric 1 +healed 1 +obese 1 +organomegaly 1 +differentiated 1 +forming 1 +invades 1 +effect 1 +margins 1 +cholecystitis 1 +ranging 1 +stones 1 +node 1 +primary 1 +specimen(s 1 +sections 1 +consisting 1 +description 1 +specimen 1 +extends 1 +submitted 1 +grossed 1 +portion 1 +staples 1 +gross 1 +thickness 1 +limits 1 +shows 1 +has 1 +recommend 1 +presentation 1 +risk 1 +associated 1 +limited 1 +nodule 1 +located 1 +returns 1 +stable 1 +score 1 +dependent 1 +is 1 +married 1 +employed 1 +examined 1 +comfortable 1 +recurrent 1 +agree 1 +willing 1 +cells 1 +nodes 1 +perforation 1 +infiltrates 1 +sect 1 +margin 1 +hold 1 +interpretation 1 +situated 1 +fibrosis 1 +infiltrate 1 +extend 1 +pools 1 +ulceration 1 +absent 1 +from 1 +present 1 +uninvolved 1 +hnpcc 1 +answered 1 +agreed 1 +drinks 1 +activities 1 +alternatives 1 +directives 1 +section 1 +necrosis 1 +demonstrates 1 +steatosis 1 +tissue 1 +cuts 1 +factors 1 +obesity 1 +asymptomatic 1 +symptom 1 +continues 1 +reports 1 +appearing 1 +soft 1 +nontender 1 +nondistended 1 +active 1 +intact 1 +murmur 1 +calcifications 1 +cysts 1 +ascites 1 +diverticula 1 +adenopathy 1 +penetrates 1 +synaptophysin 1 +chromogranin 1 +support 1 +nodules 1 +orientation 1 +features 1 +stone 1 +n0 1 +explained 1 +feel 1 +options 1 +site 1 +assisted 1 +tingling 1 +expressed 1 +wish 1 +abnormality 1 +issue 1 +okay 1 +indication 1 +sinusitis 1 +retired 1 +notes 1 +rash 1 +controlled 1 +good 1 +aware 1 +medications 1 +unremarkable 1 +percussion 1 +code 1 +indications 1 +presence 1 +postoperative 1 +fevers 1 +smoke 1 +deficits 1 +abnormalities 1 +plans 1 +here 1 +health 1 +understanding 1 +uses 1 +emphasized 1 +eager 1 +like 1 +cholangitis 1 +describes 1 +states 1 +feels 1 +candidate 1 +reported 1 +works 1 +case 1 +comments 1 +understands 1 +recovering 1 +free 1 +instability 1 +recuts 1 +pt2n0 1 +pt1n0 1 +confined 1 +tender 1 +rare 1 +motion 1 +pressure 1 +dehydration 1 +addressed 1 +osteoporosis 1 +position/cuff 1 +s1 1 +s2 1 +wears 1 +palpitations 1 +syncope 1 +orthopnea 1 +pains 1 +myelitis 1 +dysphagia 1 +control 1 +pt3n0 1 +ulcer 1 +ttf-1 1 +surface 1 +cholesterolosis 1 +segment 1 +sectioning 1 +education 1 +arrangements 1 +happy 1 +continuing 1 +signs 1 +back 1 +content 1 +swelling 1 +stated 1 +status 1 +inked 1 +trouble 1 +cough 1 +scar 1 +fine 1 +planning 1 +sore 1 +rhinorrhea 1 +guarding 1 +seeing 1 +seeking 1 +characteristics 1 +stage 1 +dyspnea 1 +movement 1 +sweats 1 +lives 1 +seizures 1 +small 1 +pame 1 +moves 1 +moving 1 +uncomfortable 1 +tolerance 1 +etoh 1 +caffeine 1 +cups 1 +round 1 +reactive 1 +symmetrical 1 +wheezes 1 +rales 1 +rhonchi 1 +atraumatic 1 +thyromegaly 1 +concerned 1 +greater 1 +double 1 +blurred 1 +tinnitus 1 +hemoptysis 1 +production 1 +believes 1 +breathing 1 +heaviness 1 +headache 1 +vertigo 1 +provided 1 +verbalized 1 +collaborated 1 +uncertain 1 +believe 1 +incisions 1 +indicate 1 +complaint 1 +mellitus 1 +iron 1 +arthritis 1 +sutures 1 +ring 1 +defect 1 +accompanied 1 +fit 1 +says 1 +clubbing 1 +cyanosis 1 +recommendation 1 +improving 1 +walk 1 +ambulation 1 +insulin 1 +have 1 +dry 1 +sleep 1 +eating 1 +breath 1 +mucin 1 +groups 1 +involves 1 +concern 1 +achalasia 1 +cirrhosis 1 +related 1 +quality 1 +needs 1 +steatohepatitis 1 +hypertriglyceridemia 1 +insomnia 1 +think 1 +informs 1 +due 1 +been 1 +want 1 +situation 1 +wants 1 +using 1 +lenses 1 +violence 1 +tone 1 +strength 1 +distance 1 +rom 1 +dyspepsia 1 +ready 1 +recommending 1 +markers 1 +urination 1 +rheumatica 1 +collaborating 1 +sores 1 +claudication 1 +tells 1 +enzymes 1 +pms2 1 +mutations 1 +single 1 +interested 1 +adenomyoma 1 +unclear 1 +bring 1 +ctap 1 +fap 1 +largest 1 +tumors 1 +understand 1 +know 1 +alive 1 +scars 1 +acceptable 1 +side 1 +omeprazole 1 +measures 1 +prognosis 1 +cdx2 1 +p 1 +a 1 +dnr 1 +goal 1 +stiffness 1 +tell 1 +alzheimer's 1 +reason 1 +dementia 1 +history 2 +noticing 2 +vaccine 2 +density 2 +dyslipidemia 2 +smoked 2 +fatigued 2 +pmh 2 +psh 2 +recover 2 +zoster 2 +simvastatin 2 +details 2 +image 2 +hemoglobin 3 +labs 3 +sodium 3 +cea 3 +course 3 +placement 3 +admission 3 +evaluated 3 +showed 3 +suggesting 3 +polyps 3 +asked 3 +resection 3 +diagnosed 3 +colonoscopy 3 +demonstrated 3 +lesion 3 +found 3 +suggestive 3 +adenoma 3 +polyp 3 +noted 3 +reaction 3 +resections 3 +invaded 3 +resected 3 +blood 3 +complaining 3 +scan 3 +test 3 +cleared 3 +surgeries 3 +smoking 3 +mammogram 3 +screening 3 +met 3 +slides 3 +read 3 +ct 3 +referred 3 +hospitalized 3 +egd 3 +removed 3 +extended 3 +pathology 3 +biopsies 3 +esophagitis 3 +metaplasia 3 +mastectomy 3 +chemoradiation 3 +appendectomy 3 +repair 3 +dead 3 +lymphoma 3 +age 3 +tobacco 3 +quit 3 +cigarettes 3 +panel 3 +tetanus 3 +immunizations 3 +influenza 3 +colectomy 3 +clot 3 +seen 3 +placed 3 +eus 3 +described 3 +biopsied 3 +tattooed 3 +dependence 3 +thrombosis 3 +attack 3 +stroke 3 +reactions 3 +disorders 3 +oophorectomy 3 +usage 3 +hemicolectomy 3 +excision 3 +biopsy 3 +cholecystectomy 3 +received 3 +labeled 3 +abscess 3 +treated 3 +antibiotics 3 +presented 3 +revealed 3 +causing 3 +documented 3 +was 3 +taken 3 +staged 3 +radiation 3 +cycles 3 +workup 3 +showing 3 +gastritis 3 +prep 3 +response 3 +thickening 3 +activity 3 +performed 3 +completed 3 +dissection 3 +signed 3 +consent 3 +cytology 3 +thought 3 +mri 3 +accident 3 +scans 3 +nodularity 3 +pet 3 +endoscopy 3 +hysterectomy 3 +hematocrit 3 +albumin 3 +compared 3 +cxr 3 +obstruction 3 +laparotomy 3 +request 3 +images 3 +confirmed 3 +demonstrate 3 +study 3 +suggested 3 +echocardiogram 3 +anemic 3 +recommended 3 +developed 3 +returned 3 +admitted 3 +stopped 3 +difficulties 3 +cabg 3 +hospitalization 3 +onset 3 +pack 3 +wbc 3 +plt 3 +alk-phos 3 +ast 3 +ecgs 3 +some 3 +omentectomy 3 +added 3 +xeloda 3 +complicated 3 +started 3 +avastin 3 +reveal 3 +sign 3 +worrisome 3 +treatments 3 +5-fu 3 +revealing 3 +metastasis 3 + 3 +ordered 3 +saw 3 +inr 3 +imaging 3 +uptake 3 +contrast 3 +enhancement 3 +form 3 +scanned 3 +came 3 +tonsillectomy 3 +asthma 3 +dvt 3 +infusion 3 +decreased 3 +angiogram 3 +dose 3 +chemistry 3 +invade 3 +offered 3 +pt3 3 +cva 3 +reduced 3 +mentioned 3 +marked 3 +obtained 3 +causes 3 +sought 3 +died 3 +adhesions 3 +fistula 3 +stricture 3 +repaired 3 +sent 3 +passed 3 +tolerated 3 +preparation 3 +ulcerated 3 +hepatectomy 3 +requested 3 +made 3 +regurgitation 3 +enlargement 3 +effusion 3 +hemorrhoids 3 +transferred 3 +drain 3 +diverticulitis 3 +tia 3 +photograph 3 +abuts 3 +measuring 3 +values 3 +guided 3 +consensus 3 +replacement 3 +lysis 3 +bevacizumab 3 +leucovorin 3 +tried 3 +stent 3 +sigmoidoscopy 3 +resolved 3 +enterography 3 +required 3 +discontinued 3 +proctocolectomy 3 +lymphadenectomy 3 +invading 3 +iort 3 +fractions 3 +fraction 3 +noting 3 +stenting 3 +identifying 3 +stripping 3 +malignancies 3 +episode 3 +one 3 +exposure 3 +cholesterol 3 +hyperthermia 3 +intubations 3 +illnesses 3 +wbc's 3 +underwent 3 +neutropenia 3 +referral 3 +strictures 3 +collection 3 +takedown 3 +complained 3 +fluid 3 +all 3 +bph 3 +repeated 3 +began 3 +hyperglycemia 3 +went 3 +cr 3 +cramping 3 +line 3 +bypass 3 +deemed 3 +radiosensitizer 3 +initiated 3 +ligation 3 +mi 3 +labeled 3 +echo 3 +hgb 3 +transfusions 3 +secondary 3 +arthroplasty 3 +resolution 3 +reduction 3 +enema 3 +checked 3 +unable 3 +smaller 3 +left 3 +vincristine 3 +worked 3 +mesh 3 +angioplasty 3 +radiochemotherapy 3 +irinotecan 3 +irregularity 3 +finding 3 +polypectomy 3 +prominent 3 +lumpectomy 3 +fracture 3 +astrocytoma 3 +tract 3 +stents 3 +appeared 3 +thrombus 3 +classified 3 +obstructed 3 +discharge 3 +discharged 3 +arrange 4 +analysis 4 +set 4 +appointment 4 +see 4 +work 4 +anesthesia 4 +contact 4 +schedule 4 +follow 4 +x-ray 4 +assess 4 +meet 4 +reflected 4 +revised 4 +need 4 +procedure 4 +management 4 +decide 4 +available 4 +proceed 4 +operation 4 +rule 4 +completion 4 +intervention 4 +followup 4 +determine 4 +evaluate 4 +ask 4 +cure 4 +needed 4 +consider 4 +restaging 4 +address 4 +looking 4 +return 4 +stoma 4 +look 4 +start 4 +apr 4 +go 4 +care 4 +resect 4 +monitor 4 +continue 4 +dismissal 4 +monitoring 4 +following 4 +help 4 +facilitate 4 +summary 4 +watch 4 +appointments 4 +returning 4 +try 4 +get 4 +attempt 4 +quantify 4 +staff 4 +send 4 +ekg 4 +call 4 +considered 4 +recheck 4 +sees 4 +stop 4 +included 4 +treat 4 +remove 4 +necessary 4 +mention 4 +proceeding 4 +period 4 +initiate 4 +continuity 4 +restart 4 +copy 4 +cut 4 +repeat 4 diff --git a/applications/thyme/temporality.py b/applications/thyme/temporality.py new file mode 100644 index 0000000..53dd38f --- /dev/null +++ b/applications/thyme/temporality.py @@ -0,0 +1,369 @@ +import re +import pandas as pd + +from trove.contrib.labelers.clinical.helpers import get_left_span, get_right_span, token_distance +from trove.contrib.labelers.clinical.timex import rgx_number_full + + +############################################################################### +# +# Temporality THYME corpus 2014 +# +############################################################################### + + +class TemporalityLabelingFunctions(object): + + def __init__(self, data_root): + self.data_root = data_root + self.class_map = self.load_class_map() + self.term_class_map = self.load_term_class_map() + self.load_rgx() + + def load_class_map(self): + """ + + """ + + class_map = { + 'ABSTAIN': -1, + 'OVERLAPS': 0, + 'BEFORE_OVERLAPS': 1, + 'BEFORE': 2, + 'AFTER': 3, + } + + return class_map + + def _get_rgx(self, file_lines: list, name: str): + """ + Auxiliary function to read a rgx from the TSV file by passing a name + """ + rgx_list = [] + for line in file_lines: + label, name_line, tier, ignore_case, regex, notes = line.split( + '\t') + if name_line == name: + rgx_list.append(regex) + + return rgx_list + + def load_term_class_map(self): + """ + This loads a special file created with dictionary of words that are dominant in certain classes + """ + term_class_map = open( + f'{self.data_root}/doc_time_rel_dominant_class.tsv', 'r').read().splitlines() + term_class_map = {x.split('\t')[0]: int( + x.split('\t')[1]) for x in term_class_map} + + return term_class_map + + def load_word_lists(self): + """ + This method loads the word list in the TSV file + """ + all_rgx = open(f"{self.data_root}/thyme_regexes.tsv", 'r').readlines() + + # get all the distinct names + name_list = pd.read_csv( + f"{self.data_root}/thyme_regexes.tsv", sep="\t").loc[:, "NAME"].unique().tolist() + + for name in name_list: + if name.startswith("word"): + rgx_list_set = set(self._get_rgx(all_rgx, name)) + + # Create the object inside the class with the same name as in the TSV file + setattr(self, name, rgx_list_set) + + print(f"{name} Words Loaded") + + def load_rgx(self): + """ + This method loads all the regex on the TSV file in the appopiate object + """ + # Read File + all_rgx = open(f"{self.data_root}/thyme_regexes.tsv", 'r').readlines() + + # get all the distinct names + name_list = pd.read_csv( + f"{self.data_root}/thyme_regexes.tsv", sep="\t").loc[:, "NAME"].unique().tolist() + + for name in name_list: + if name.startswith("regex"): + rgx_list_string = self._get_rgx(all_rgx, name) + rgx_list_compiled = [re.compile( + rgx, re.I) for rgx in rgx_list_string] + + # Create the object inside the class with the same name as in the TSV file + setattr(self, name, rgx_list_compiled) + + print(f"{name} Regexes Loaded") + + def LF_overlap_event(self, span): + evts = {'hemorrhage', 'edema', 'complications', + 'distress', 'ischemia', 'pneumothorax'} + return self.class_map["OVERLAPS"] if span.text.lower() in evts else self.class_map["ABSTAIN"] + + def LF_regex_before_left(self, span): + sentence_text = get_left_span(span, span.sentence, window=5).text + for rgx in self.regex_before_left: + if rgx.search(sentence_text): + return self.class_map["BEFORE"] + else: + continue + return self.class_map["ABSTAIN"] + + def LF_regex_before_overlap_left(self, span): + left = get_left_span(span, span.sentence, window=4) + context = f"{left.text} {span.text}" + for rgx in self.regex_before_overlap_left: + if rgx.search(context): + return self.class_map["BEFORE_OVERLAPS"] + else: + continue + return self.class_map["ABSTAIN"] + + def LF_regex_after_left(self, span): + rgxs_stop_words = [re.compile(stop_word, re.I) + for stop_word in ['otherwise', 'today']] + left = get_left_span(span, span.sentence, window=10) + small_left = get_left_span(span, span.sentence, window=2) + context = f"{left.text} {span.text}" + # Filter stopwords + for rgx_stop in rgxs_stop_words: + if rgx_stop.search(small_left.text): + return self.class_map["ABSTAIN"] + else: + continue + + for rgx in self.regex_after_left: + if rgx.search(context) and span.text not in ['arranged', + 'agreed', + 'consented', + 'discussed', + 'plan', + 'plans', + 'scheduled']: + return self.class_map["AFTER"] + else: + continue + return self.class_map["ABSTAIN"] + + def LF_word_list_before(self, span): + if span.text.lower() in self.word_list_before: + return self.class_map["BEFORE"] + else: + return self.class_map["ABSTAIN"] + + def LF_word_list_overlap(self, span): + if span.text.lower() in self.word_list_overlap: + return self.class_map["OVERLAPS"] + else: + return self.class_map["ABSTAIN"] + + def LF_sections_overlap(self, span): + if span.props['section'] is not None: + if span.props['section'].text.lower() in self.sections_overlap: + return self.class_map["OVERLAPS"] + else: + return self.class_map["ABSTAIN"] + else: + return self.class_map["ABSTAIN"] + + def LF_history_of(self, span): + rgx = r'''((family|surgical|oncologic|medical|patient) history|history of|history:|(his|her) history)''' + left = get_left_span(span, span.sentence, window=2) + right = get_right_span(span, span.sentence, window=2) + context = f"{left.text} {span.text} {right.text}" + return self.class_map["BEFORE_OVERLAPS"] if re.search(rgx, context, re.I) else self.class_map["ABSTAIN"] + + def _is_hypothetical(span): + accept_rgxs = [ + r"\b(if need be)\b", + r"\b((if|should)\s+(you|she|he|be)|(she|he|you)\s+(might|could|may)\s*(be)*|if)\b", + r"\b((possibility|potential|chance|need) (for|of)|potentially)\b", + r"\b(candidate for|pending)\b", + r"\b(assuming)\s+(you|she|he)\b", + r"(recommendation)\s*[:]", + r"(planned procedure)\s*[:]", + r"\b(upcoming|would benefit from|(undergo|requires) a)\b", + r'''\b(please call or return (for|if))\b''', + r"\b(proceed with|consider|to undergo|scheduled for)\b" + ] + + text = get_left_span(span, span.sentence, window=20).text + + for rgx in accept_rgxs: + if re.search(rgx, text, re.I): + return True + + return False + + def LF_hypothetical(self, span): + return self.class_map["AFTER"] if self._is_hypothetical(span) else self.class_map["ABSTAIN"] + + def LF_tdelta_after_dist_1(self, span): + """ requires revision date info """ + if 'tdelta' not in span.props or 'doctime' not in span.sentence.document.props: + return self.class_map["ABSTAIN"] + closest_ts = span.props['timex_span'] + tdelta_start = span.props['tdelta'] + tdelta_rev = closest_ts - span.sentence.document.props['doctime'] + + v = tdelta_start > 5 and tdelta_rev > 5 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["OVERLAPS"] if v and dist <= 1 else self.class_map["ABSTAIN"] + + def LF_tdelta_overlaps_dist_1(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] >= 0 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["OVERLAPS"] if v and dist <= 1 else self.class_map["ABSTAIN"] + + def LF_tdelta_overlaps_dist_5(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] >= 0 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["OVERLAPS"] if v and (dist > 1 and dist <= 5) else self.class_map["ABSTAIN"] + + def LF_tdelta_overlaps_dist_10(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] >= 0 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["OVERLAPS"] if v and (dist > 5 and dist <= 10) else self.class_map["ABSTAIN"] + + def LF_tdelta_overlaps_dist_long(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] >= 0 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["OVERLAPS"] if v and dist > 10 else self.class_map["ABSTAIN"] + + def LF_tdelta_before_dist_1(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] < -1 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["BEFORE"] if v and dist <= 1 else self.class_map["ABSTAIN"] + + def LF_tdelta_before_dist_5(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] < -1 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["BEFORE"] if v and (dist > 1 and dist <= 5) else self.class_map["ABSTAIN"] + + def LF_tdelta_before_dist_10(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] < -1 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["BEFORE"] if v and (dist > 5 and dist <= 10) else self.class_map["ABSTAIN"] + + def LF_tdelta_before_dist_long(self, span): + if 'tdelta' not in span.props: + return self.class_map["ABSTAIN"] + v = span.props['tdelta'] < -1 + dist = token_distance(span, span.props['timex_span']) + return self.class_map["BEFORE"] if v and dist > 10 else self.class_map["ABSTAIN"] + + def LF_overlaps_now(self, span): + left = get_left_span(span, span.sentence, window=1).text + right = get_right_span(span, span.sentence, window=1).text + rgx = r'''\b(now)\b''' + return self.class_map["OVERLAPS"] if re.search(rgx, left, re.I) or re.search(rgx, right, re.I) else self.class_map["ABSTAIN"] + + def LF_overlaps_current(self, span): + left = get_left_span(span, span.sentence, window=4) + return self.class_map["OVERLAPS"] if re.search(r'''\b(current(ly)*)\b''', left.text, re.I) else self.class_map["ABSTAIN"] + + def LF_before_recent(self, span): + left = get_left_span(span, span.sentence, window=2) + return self.class_map["BEFORE"] if re.search(r'''\b(recent(ly)*)\b''', left.text, re.I) else self.class_map["ABSTAIN"] + + def LF_before_x_ago(self, span): + rgx = r'''\b(([1-9][0-9]|{}|few|a) ((year|month|week|day|hour)[s]*) ago)\b'''.format(rgx_number_full) + left = get_left_span(span, span.sentence, window=5).text + right = get_right_span(span, span.sentence, window=5).text + return self.class_map["BEFORE"] if re.search(rgx, left, re.I) or re.search(rgx, right, re.I) else self.class_map["ABSTAIN"] + + def LF_after_next_x(self, span): + rgx = r'''\b((next|upcoming) (month|week|monday|tuesday|wednesday|thursday|friday)|(later (today|tonight|date|this (week|month|afternoon|evening))))\b'''.format( + rgx_number_full) + left = get_left_span(span, span.sentence, window=5).text + right = get_right_span(span, span.sentence, window=5).text + return self.class_map["AFTER"] if re.search(rgx, left, re.I) or re.search(rgx, right, re.I) else self.class_map["ABSTAIN"] + + def LF_after_tomorrow(self, span): + left = get_left_span(span, span.sentence, window=5).text + right = get_right_span(span, span.sentence, window=5).text + rgx = r'''\b(tomorrow)\b''' + return self.class_map["AFTER"] if re.search(rgx, left, re.I) or re.search(rgx, right, re.I) else self.class_map["ABSTAIN"] + + def LF_before_yesterday(self, span): + left = get_left_span(span, span.sentence, window=2).text + right = get_right_span(span, span.sentence, window=2).text + rgx = r'''\b(yesterday)\b''' + return self.class_map["BEFORE"] if re.search(rgx, left, re.I) or re.search(rgx, right, re.I) else self.class_map["ABSTAIN"] + + def LF_after_will(self, span): + left = get_left_span(span, span.sentence, window=6).text + rgx = r'''\b(will (try)*|plan(ning)*)\b''' + return self.class_map["AFTER"] if re.search(rgx, left, re.I) else self.class_map["ABSTAIN"] + + def LF_after_should(self, span): + left = get_left_span(span, span.sentence, window=10).text + rgx = r'''\b(should (be)*)\b''' + return self.class_map["AFTER"] if re.search(rgx, left, re.I) else self.class_map["ABSTAIN"] + + def LF_dominant_temporality_terms(self, span): + t = span.text.lower() + return self.class_map["ABSTAIN"] if t not in self.term_class_map else self.term_class_map[t] - 1 + + def lfs(self): + """ + + Parameters + ---------- + + Returns + ------- + + """ + + lfs = [ + self.LF_tdelta_overlaps_dist_1, + self.LF_tdelta_overlaps_dist_5, + self.LF_tdelta_overlaps_dist_10, + self.LF_tdelta_overlaps_dist_long, + self.LF_overlaps_now, + self.LF_overlap_event, + self.LF_overlaps_current, + self.LF_word_list_overlap, + self.LF_sections_overlap, + self.LF_tdelta_before_dist_1, + self.LF_tdelta_before_dist_5, + self.LF_tdelta_before_dist_10, + self.LF_tdelta_before_dist_long, + self.LF_before_x_ago, + self.LF_before_recent, + self.LF_before_yesterday, + self.LF_regex_before_left, + self.LF_word_list_before, + self.LF_history_of, + self.LF_regex_before_overlap_left, + self.LF_hypothetical, + self.LF_after_next_x, + self.LF_after_tomorrow, + self.LF_after_will, + self.LF_after_should, + self.LF_regex_after_left, + self.LF_dominant_temporality_terms + ] + + print(f'Labeling Functions n={len(lfs)}') + return lfs diff --git a/applications/thyme/thyme_regexes.tsv b/applications/thyme/thyme_regexes.tsv new file mode 100644 index 0000000..d46a721 --- /dev/null +++ b/applications/thyme/thyme_regexes.tsv @@ -0,0 +1,199 @@ +LABEL NAME TIER IGNORE_CASE REGEX NOTES +2 regex_before_left 1 1 several weeks prior +2 regex_before_left 1 1 on previous exam +2 regex_before_left 1 1 resolved +2 regex_before_left 1 1 (on |from )?(previous|prior) exam +2 regex_before_left 1 1 documented +2 regex_before_left 1 1 sequelae of +2 regex_before_left 1 1 was diagnosed +2 regex_before_left 1 1 has recovered from +1 regex_before_overlap_left 1 1 has been \b.+ing\b +1 regex_before_overlap_left 1 1 has continued +1 regex_before_overlap_left 1 1 has developed +1 regex_before_overlap_left 1 1 have developed +1 regex_before_overlap_left 1 1 has experienced +1 regex_before_overlap_left 1 1 has maintained +1 regex_before_overlap_left 1 1 has remained +1 regex_before_overlap_left 1 1 has tolerated +1 regex_before_overlap_left 1 1 no change +1 regex_before_overlap_left 1 1 no major change +1 regex_before_overlap_left 1 1 no known cancer(s)? +1 regex_before_overlap_left 1 1 \b(she|he)\b does note +1 regex_before_overlap_left 1 1 \b(she|he)\b continues to (have|be)? +1 regex_before_overlap_left 1 1 \b(she|he)\b has continued +1 regex_before_overlap_left 1 1 \b(she|he)\b.+recovering +1 regex_before_overlap_left 1 1 is continuing to recover +1 regex_before_overlap_left 1 1 \bis\b.+smoker +1 regex_before_overlap_left 1 1 recurrent episodes of +3 regex_after_left 1 1 \b(is scheduled)\b +3 regex_after_left 1 1 \b(will be)\b +3 regex_after_left 1 1 \b(will proceed)\b +3 regex_after_left 1 1 \b(will.+meet)\b +3 regex_after_left 1 1 \b(will contact)\b +3 regex_after_left 1 1 \b(going to)\b +3 regex_after_left 1 1 \b(once.+obtained)\b +3 regex_after_left 1 1 \b(once these are obtained)\b +3 regex_after_left 1 1 \b(evaluated for|upcoming|would benefit from|(undergo|requires) a)\b +3 regex_after_left 1 1 \b(if possible)\b +3 regex_after_left 1 1 \b(an approach would be)\b +3 regex_after_left 1 1 \b((she|he) would require)\b +3 regex_after_left 1 1 \b(i would discuss)\b +3 regex_after_left 1 1 \b(we would (be|arrange|set))\b +3 regex_after_left 1 1 \b(it would be)\b +3 regex_after_left 1 1 \b(suggestion for)\b +3 regex_after_left 1 1 "patient should (\b[a-z]+\b\s){1,2}follow" +3 regex_after_left 1 1 \b(we will proceed)\b +3 regex_after_left 1 1 \b(assuming)\s+(you|she|he)\b +3 regex_after_left 1 1 \b(candidate for|pending)\b +3 regex_after_left 1 1 \b(please call or return (for|if))\b +2 word_list_before 1 1 demonstrated +0 word_list_overlap 1 1 arranged +0 word_list_overlap 1 1 consented +0 word_list_overlap 1 1 plan +0 word_list_overlap 1 1 requested +0 word_list_overlap 1 1 scheduled +0 word_sections_overlap 1 1 # basilic vein thrombosis: +0 word_sections_overlap 1 1 -will discuss with dr. ballard re: +0 word_sections_overlap 1 1 "a. colon, ascending total abdominal colectomy:" +0 word_sections_overlap 1 1 "a. colon, total abdominal colectomy:" +0 word_sections_overlap 1 1 "a. serosa, small bowel implant, biopsy:" +0 word_sections_overlap 1 1 abdm: +0 word_sections_overlap 1 1 abdomen and pelvis: +0 word_sections_overlap 1 1 "abdominal colon, right hemicolectomy:" +0 word_sections_overlap 1 1 ab: +0 word_sections_overlap 1 1 abd: +0 word_sections_overlap 1 1 abd: +0 word_sections_overlap 1 1 abdomen: +0 word_sections_overlap 1 1 abdominal x-ray: +0 word_sections_overlap 1 1 acute renal failure issues: +0 word_sections_overlap 1 1 aorta: +0 word_sections_overlap 1 1 aortic valve: +0 word_sections_overlap 1 1 arterial bp-diastolic: +0 word_sections_overlap 1 1 arterial bp-systolic: +0 word_sections_overlap 1 1 ap and lateral chest: +0 word_sections_overlap 1 1 ap chest radiograph: +0 word_sections_overlap 1 1 ap chest: +0 word_sections_overlap 1 1 "b. rectosigmoid proximal anastomotic ring, excision:" +0 word_sections_overlap 1 1 "b. rectum, anus, excision:" +0 word_sections_overlap 1 1 "b. small bowel, excision:" +0 word_sections_overlap 1 1 "b. gallbladder, cholecystectomy:" +0 word_sections_overlap 1 1 back: +0 word_sections_overlap 1 1 billing/primary oncologist: +0 word_sections_overlap 1 1 bone windows: +0 word_sections_overlap 1 1 c. congestive heart failure: +0 word_sections_overlap 1 1 c. rhythm: +0 word_sections_overlap 1 1 calculated ejection fraction: +0 word_sections_overlap 1 1 card: +0 word_sections_overlap 1 1 cardiac: +0 word_sections_overlap 1 1 cardiac: +0 word_sections_overlap 1 1 cardiovascular status: +0 word_sections_overlap 1 1 cardiovascular: +0 word_sections_overlap 1 1 cat scan abdomen: +0 word_sections_overlap 1 1 chest x-ray: +0 word_sections_overlap 1 1 chest x-ray: +0 word_sections_overlap 1 1 clinical implications: +0 word_sections_overlap 1 1 cn: +0 word_sections_overlap 1 1 condition at discharge: +0 word_sections_overlap 1 1 condition on discharge: +0 word_sections_overlap 1 1 contents: +0 word_sections_overlap 1 1 contraindications for iv contrast: +0 word_sections_overlap 1 1 coordination: +0 word_sections_overlap 1 1 coronary artery disease: +0 word_sections_overlap 1 1 cranial nerves: +0 word_sections_overlap 1 1 ct abd: +0 word_sections_overlap 1 1 ct of the abdomen: +0 word_sections_overlap 1 1 ct of the pelvis: +0 word_sections_overlap 1 1 cv: +0 word_sections_overlap 1 1 cxr: +0 word_sections_overlap 1 1 dermatologic: +0 word_sections_overlap 1 1 discharge condition: +0 word_sections_overlap 1 1 discharge diagnosis: +0 word_sections_overlap 1 1 drips: +0 word_sections_overlap 1 1 electrocardiogram: +0 word_sections_overlap 1 1 ext: +0 word_sections_overlap 1 1 ext: +0 word_sections_overlap 1 1 ext: +0 word_sections_overlap 1 1 extrem: +0 word_sections_overlap 1 1 extrem: +0 word_sections_overlap 1 1 "f. liver, right lobe, partial hepatectomy:" +0 word_sections_overlap 1 1 "f. liver, right lobe, wedge resection:" +0 word_sections_overlap 1 1 final diagnosis: +0 word_sections_overlap 1 1 final diagnosis +0 word_sections_overlap 1 1 foot pain: +0 word_sections_overlap 1 1 fundoscopic exam: +0 word_sections_overlap 1 1 gastrointestinal: +0 word_sections_overlap 1 1 gen: +0 word_sections_overlap 1 1 general: +0 word_sections_overlap 1 1 general comments: +0 word_sections_overlap 1 1 genitalia: +0 word_sections_overlap 1 1 genitourinary: +0 word_sections_overlap 1 1 gi/abd: +0 word_sections_overlap 1 1 gram stain: +0 word_sections_overlap 1 1 groin: +0 word_sections_overlap 1 1 groin: +0 word_sections_overlap 1 1 head and neck: +0 word_sections_overlap 1 1 head ct: +0 word_sections_overlap 1 1 "head, eyes, ears, nose and throat:" +0 word_sections_overlap 1 1 "head, eyes, ears, nose, and throat:" +0 word_sections_overlap 1 1 heent: +0 word_sections_overlap 1 1 hematology: +0 word_sections_overlap 1 1 hepatic encephalopathy: +0 word_sections_overlap 1 1 "iii, iv, vi:" +0 word_sections_overlap 1 1 imperssion: +0 word_sections_overlap 1 1 initial physical examination: +0 word_sections_overlap 1 1 "ix, x:" +0 word_sections_overlap 1 1 kub: +0 word_sections_overlap 1 1 laboratory studies: +0 word_sections_overlap 1 1 language: +0 word_sections_overlap 1 1 left atrium: +0 word_sections_overlap 1 1 left foot x-ray: +0 word_sections_overlap 1 1 "left knee, two views:" +0 word_sections_overlap 1 1 left ventricle: +0 word_sections_overlap 1 1 lv wall motion: +0 word_sections_overlap 1 1 medications at home: +0 word_sections_overlap 1 1 medications: +0 word_sections_overlap 1 1 mitral valve: +0 word_sections_overlap 1 1 neuro: +0 word_sections_overlap 1 1 neurologic exam: +0 word_sections_overlap 1 1 note added at attending review: +0 word_sections_overlap 1 1 on cranial nerve exam: +0 word_sections_overlap 1 1 pain: +0 word_sections_overlap 1 1 pericardium: +0 word_sections_overlap 1 1 pernicious anemia issues: +0 word_sections_overlap 1 1 pertinent laboratory data on admission: +0 word_sections_overlap 1 1 pertinent radiology/imaging: +0 word_sections_overlap 1 1 physical exam on admission: +0 word_sections_overlap 1 1 physical examination on admission: +0 word_sections_overlap 1 1 physical examination on presentation: +0 word_sections_overlap 1 1 physical examination today is as follows: +0 word_sections_overlap 1 1 portable chest: +0 word_sections_overlap 1 1 post bypass: +0 word_sections_overlap 1 1 post-cpb: +0 word_sections_overlap 1 1 pre-cpb: +0 word_sections_overlap 1 1 preoperative medications: +0 word_sections_overlap 1 1 pulmonary exam: +0 word_sections_overlap 1 1 pulmonic valve/pulmonary artery: +0 word_sections_overlap 1 1 pump: +0 word_sections_overlap 1 1 radiology: +0 word_sections_overlap 1 1 radiology/imaging: +0 word_sections_overlap 1 1 re-evaluate lv function height: +0 word_sections_overlap 1 1 rectal exam: +0 word_sections_overlap 1 1 rectal: +0 word_sections_overlap 1 1 rectal: +0 word_sections_overlap 1 1 resp: +0 word_sections_overlap 1 1 respiratory failure: +0 word_sections_overlap 1 1 rhythm: +0 word_sections_overlap 1 1 right atrium/interatrial septum: +0 word_sections_overlap 1 1 right ventricle: +0 word_sections_overlap 1 1 sensation: +0 word_sections_overlap 1 1 sensory: +0 word_sections_overlap 1 1 sternal incision: +0 word_sections_overlap 1 1 summary of hospital course by issue/system: +0 word_sections_overlap 1 1 summary of hospital course: +0 word_sections_overlap 1 1 tricuspid valve: +0 word_sections_overlap 1 1 upright ap chest: +0 word_sections_overlap 1 1 urinary tract infection: +0 word_sections_overlap 1 1 "v, vii:" +0 word_sections_overlap 1 1 vent: +0 word_sections_overlap 1 1 vii: +0 word_sections_overlap 1 1 xii: \ No newline at end of file