Skip to content

Commit 3691a42

Browse files
committed
Adding properties param for tokensregex, semgrex and regex
1 parent 4c9bd82 commit 3691a42

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

example.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@
1414
print(output)
1515
output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False)
1616
print(output)
17+
output = nlp.semgrex(text, pattern='{ner: PERS}', filter=False, properties={
18+
'annotators': 'tokenize,ssplit,ner,depparse',
19+
'pipelineLanguage': 'en',
20+
})
21+
print(output)

pycorenlp/corenlp.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,25 @@ def annotate(self, text, properties=None):
3636
pass
3737
return output
3838

39-
def tokensregex(self, text, pattern, filter):
40-
return self.regex('/tokensregex', text, pattern, filter)
39+
def tokensregex(self, text, pattern, filter, properties = None):
40+
return self.regex('/tokensregex', text, pattern, filter, properties)
4141

42-
def semgrex(self, text, pattern, filter):
43-
return self.regex('/semgrex', text, pattern, filter)
42+
def semgrex(self, text, pattern, filter, properties = None):
43+
return self.regex('/semgrex', text, pattern, filter, properties)
4444

45-
def regex(self, endpoint, text, pattern, filter):
45+
def regex(self, endpoint, text, pattern, filter, properties = None):
46+
assert isinstance(text, str)
47+
data = text.encode()
4648
r = requests.get(
4749
self.server_url + endpoint, params={
48-
'pattern': pattern,
50+
'pattern': pattern,
51+
'properties': str(properties or {}),
4952
'filter': filter
50-
}, data=text)
53+
}, data=data)
54+
r.encoding = 'utf-8'
5155
output = r.text
5256
try:
53-
output = json.loads(r.text)
57+
output = json.loads(r.text, encoding='utf-8', strict=True)
5458
except:
5559
pass
5660
return output

0 commit comments

Comments
 (0)