Skip to content

Commit a0696bb

Browse files
committed
Update RegexpTagger to be able to specify matching groups
1 parent ba41223 commit a0696bb

File tree

2 files changed

+21
-11
lines changed

2 files changed

+21
-11
lines changed

flair/models/regexp_tagger.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ def get_token_span(self, span: tuple[int, int]) -> Span:
4545

4646

4747
class RegexpTagger:
48-
def __init__(self, mapping: Union[list[tuple[str, str]], tuple[str, str]]) -> None:
48+
def __init__(
49+
self, mapping: Union[list[Union[tuple[str, str], tuple[str, str, int]]], tuple[str, str], tuple[str, str, int]]
50+
) -> None:
4951
r"""This tagger is capable of tagging sentence objects with given regexp -> label mappings.
5052
5153
I.e: The tuple (r'(["\'])(?:(?=(\\?))\2.)*?\1', 'QUOTE') maps every match of the regexp to
@@ -58,24 +60,33 @@ def __init__(self, mapping: Union[list[tuple[str, str]], tuple[str, str]]) -> No
5860
Args:
5961
mapping: A list of tuples or a single tuple representing a mapping as regexp -> label
6062
"""
61-
self._regexp_mapping: dict[str, typing.Pattern] = {}
63+
self._regexp_mapping: list[str, typing.Pattern, int] = []
6264
self.register_labels(mapping=mapping)
6365

66+
def label_type(self):
67+
for regexp, label, group in self._regexp_mapping:
68+
return label
69+
6470
@property
6571
def registered_labels(self):
6672
return self._regexp_mapping
6773

68-
def register_labels(self, mapping: Union[list[tuple[str, str]], tuple[str, str]]):
74+
def register_labels(self, mapping: Union[list[tuple[str, str, int]], tuple[str, str, int]]):
6975
"""Register a regexp -> label mapping.
7076
7177
Args:
7278
mapping: A list of tuples or a single tuple representing a mapping as regexp -> label
7379
"""
7480
mapping = self._listify(mapping)
7581

76-
for regexp, label in mapping:
82+
for entry in mapping:
83+
regexp = entry[0]
84+
label = entry[1]
85+
group = entry[2] if len(entry) > 2 else 0
7786
try:
78-
self._regexp_mapping[label] = re.compile(regexp)
87+
pattern = re.compile(regexp)
88+
self._regexp_mapping.append((pattern, label, group))
89+
7990
except re.error as err:
8091
raise re.error(
8192
f"Couldn't compile regexp '{regexp}' for label '{label}'. Aborted with error: '{err.msg}'"
@@ -89,10 +100,7 @@ def remove_labels(self, labels: Union[list[str], str]):
89100
"""
90101
labels = self._listify(labels)
91102

92-
for label in labels:
93-
if not self._regexp_mapping.get(label):
94-
continue
95-
self._regexp_mapping.pop(label)
103+
self._regexp_mapping = [mapping for mapping in self._regexp_mapping if mapping[1] not in labels]
96104

97105
@staticmethod
98106
def _listify(element: object) -> list:
@@ -120,9 +128,11 @@ def _label(self, sentence: Sentence):
120128
"""
121129
collection = TokenCollection(sentence)
122130

123-
for label, pattern in self._regexp_mapping.items():
131+
for pattern, label, group in self._regexp_mapping:
124132
for match in pattern.finditer(sentence.to_original_text()):
125-
span: tuple[int, int] = match.span()
133+
# print(match)
134+
span: tuple[int, int] = match.span(group)
135+
# print(span)
126136
try:
127137
token_span = collection.get_token_span(span)
128138
except ValueError:

tests/models/test_regexp_tagger.py

Whitespace-only changes.

0 commit comments

Comments
 (0)