Skip to content

Commit

Permalink
Minor fixes from pyint run
Browse files Browse the repository at this point in the history
  • Loading branch information
jason-fries committed Nov 27, 2020
1 parent dc1094f commit acd4cb5
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 13 deletions.
13 changes: 4 additions & 9 deletions applications/bc5cdr/chemicals.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
import re
import functools
import itertools
import collections
import pandas as pd

import collections
from trove.labelers.tools import *
from trove.labelers.labeling import *
Expand Down Expand Up @@ -213,7 +207,7 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
target_concepts = [sty for sty in self.class_map if
self.class_map[sty] == 1]
specialist_1 = load_specialist_abbrvs(fpath,
umls,3
umls,
target_concepts=target_concepts,
filter_ambiguous=True)
target_concepts = [sty for sty in self.class_map if
Expand Down Expand Up @@ -451,7 +445,7 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
r'''((alpha|beta|gamma)[-][T])''',
re.compile(r'''(PG[-]9|U[-]II)'''),
re.compile(r'''(BPO|GSH|DFU|CsA|Srl|HOE|GVG|PAN|NMDA)'''),
re.compile(r'''(TCR|MZ|HBsAg|AraG|LR132|SSRI[s]*|HBeAg|LR132|BD10[0-9]{2}|GNC92H2|SSR103800|CGRP)'''),
re.compile(r'''(TCR|MZ|HBsAg|AraG|LR132|SSRI[s]*|HBeAg|BD10[0-9]{2}|GNC92H2|SSR103800|CGRP)'''),
# peptides and proteins with less than 15 amino acids ARE annotated
r'''(angiotensin([- ]ii)*)''',
r'''(u[- ]ii|urotensin[- ]ii)''',
Expand Down Expand Up @@ -493,7 +487,8 @@ def lfs(self, train_sentences, top_k=10, active_tiers=None):
# ----------------------------------------------------------------------
# Hyphen token
# ----------------------------------------------------------------------
def get_subtokens(dictionary, split_chars=['-'], min_occur=20):
def get_subtokens(dictionary, split_chars=None, min_occur=20):
split_chars = ['-'] if not split_chars else split_chars
freq = collections.Counter()
for term in dictionary:
for ch in split_chars:
Expand Down
2 changes: 1 addition & 1 deletion preprocessing/pipes/tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def ct_tokenizer(nlp):
:param nlp:
:return:
"""
prefix_re = re.compile(r'''^([\["'()*+-?/\<\>#%]+|[><][=])+''')
prefix_re = re.compile(r'''^([\["'()*+-?/<>#%]+|[><][=])+''')
suffix_re = re.compile(r'''([\]"'),-.:;*]|'s)$''')
infix_re = re.compile(r'''[%(),-./;=?]+''') # spaCy SBD break w/o [.]

Expand Down
2 changes: 1 addition & 1 deletion trove/contrib/labelers/clinical/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
OTHER = 2
ABSTAIN = 0

rgx_relatives = re.compile(r'''\b(((grand)*(mother|father)|grand(m|p)a)([']*s)*|((parent|(daught|sist|broth)er|son|cousin)([']*s)*))\b''', re.I)
rgx_relatives = re.compile(r'''\b(((grand)*(mother|father)|grand([mp])a)([']*s)*|((parent|(daught|sist|broth)er|son|cousin)([']*s)*))\b''', re.I)


def LF_relative(span):
Expand Down
4 changes: 2 additions & 2 deletions trove/contrib/labelers/clinical/taggers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from itertools import product
from inkfish.data.dataloaders.contexts import Span, Relation
from collections import defaultdict, namedtuple
from trove.dataloaders.contexts import Span, Relation


def get_text(words, offsets):
Expand Down Expand Up @@ -111,7 +111,7 @@ def dict_matcher(sentence,
# ignore whitespace when matching dictionary terms
text = span.text
if ignore_whitespace:
text = re.sub(r'''\s{2,}|\n{1,}''', ' ', span.text).strip()
text = re.sub(r'''\s{2,}|\n+''', ' ', span.text).strip()

# search for matches in all dictionaries
for name in dictionaries:
Expand Down
1 change: 1 addition & 0 deletions trove/metrics/analysis.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import torch
import numpy as np
import scipy.sparse as sparse
from scipy.sparse import issparse
Expand Down

0 comments on commit acd4cb5

Please sign in to comment.