Minor fixes from pyint run

som-shahlab · Nov 27, 2020 · acd4cb5 · acd4cb5
1 parent dc1094f
commit acd4cb5
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 13 deletions.
diff --git a/applications/bc5cdr/chemicals.py b/applications/bc5cdr/chemicals.py
@@ -1,9 +1,3 @@
-import re
-import functools
-import itertools
-import collections
-import pandas as pd
-
 import collections
 from trove.labelers.tools import *
 from trove.labelers.labeling import *
@@ -213,7 +207,7 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
         target_concepts = [sty for sty in self.class_map if
                            self.class_map[sty] == 1]
         specialist_1 = load_specialist_abbrvs(fpath,
-                                              umls,3
+                                              umls,
                                               target_concepts=target_concepts,
                                               filter_ambiguous=True)
         target_concepts = [sty for sty in self.class_map if
@@ -451,7 +445,7 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
                 r'''((alpha|beta|gamma)[-][T])''',
                 re.compile(r'''(PG[-]9|U[-]II)'''),
                 re.compile(r'''(BPO|GSH|DFU|CsA|Srl|HOE|GVG|PAN|NMDA)'''),
-                re.compile(r'''(TCR|MZ|HBsAg|AraG|LR132|SSRI[s]*|HBeAg|LR132|BD10[0-9]{2}|GNC92H2|SSR103800|CGRP)'''),
+                re.compile(r'''(TCR|MZ|HBsAg|AraG|LR132|SSRI[s]*|HBeAg|BD10[0-9]{2}|GNC92H2|SSR103800|CGRP)'''),
                 # peptides and proteins with less than 15 amino acids ARE annotated
                 r'''(angiotensin([- ]ii)*)''',
                 r'''(u[- ]ii|urotensin[- ]ii)''',
@@ -493,7 +487,8 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
             # ----------------------------------------------------------------------
             # Hyphen token
             # ----------------------------------------------------------------------
-            def get_subtokens(dictionary, split_chars=['-'], min_occur=20):
+            def get_subtokens(dictionary, split_chars=None, min_occur=20):
+                split_chars = ['-'] if not split_chars else split_chars
                 freq = collections.Counter()
                 for term in dictionary:
                     for ch in split_chars:

diff --git a/preprocessing/pipes/tokenizers.py b/preprocessing/pipes/tokenizers.py
@@ -313,7 +313,7 @@ def ct_tokenizer(nlp):
     :param nlp:
     :return:
     """
-    prefix_re = re.compile(r'''^([\["'()*+-?/\<\>#%]+|[><][=])+''')
+    prefix_re = re.compile(r'''^([\["'()*+-?/<>#%]+|[><][=])+''')
     suffix_re = re.compile(r'''([\]"'),-.:;*]|'s)$''')
     infix_re  = re.compile(r'''[%(),-./;=?]+''')  # spaCy SBD break w/o [.]
 

diff --git a/trove/contrib/labelers/clinical/family.py b/trove/contrib/labelers/clinical/family.py
@@ -17,7 +17,7 @@
 OTHER   = 2
 ABSTAIN = 0
 
-rgx_relatives = re.compile(r'''\b(((grand)*(mother|father)|grand(m|p)a)([']*s)*|((parent|(daught|sist|broth)er|son|cousin)([']*s)*))\b''', re.I)
+rgx_relatives = re.compile(r'''\b(((grand)*(mother|father)|grand([mp])a)([']*s)*|((parent|(daught|sist|broth)er|son|cousin)([']*s)*))\b''', re.I)
 
 
 def LF_relative(span):

diff --git a/trove/contrib/labelers/clinical/taggers.py b/trove/contrib/labelers/clinical/taggers.py
@@ -1,7 +1,7 @@
 import re
 from itertools import product
-from inkfish.data.dataloaders.contexts import Span, Relation
 from collections import defaultdict, namedtuple
+from trove.dataloaders.contexts import Span, Relation
 
 
 def get_text(words, offsets):
@@ -111,7 +111,7 @@ def dict_matcher(sentence,
         # ignore whitespace when matching dictionary terms
         text = span.text
         if ignore_whitespace:
-            text = re.sub(r'''\s{2,}|\n{1,}''', ' ', span.text).strip()
+            text = re.sub(r'''\s{2,}|\n+''', ' ', span.text).strip()
 
         # search for matches in all dictionaries
         for name in dictionaries:

diff --git a/trove/metrics/analysis.py b/trove/metrics/analysis.py
@@ -1,3 +1,4 @@
+import torch
 import numpy as np
 import scipy.sparse as sparse
 from scipy.sparse import issparse