-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtalismane.py
executable file
·171 lines (151 loc) · 5.61 KB
/
talismane.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python
#-*- coding:utf-8 -*-
""" This python module propose a simple wrapper to Talismane syntax analyzer.
Talismane is run in a different thread and the communication with python is
made through stdin/stdout.
"""
__author__ = "Emmanuel Navarro <[email protected]>"
__copyright__ = "Copyright (c) 2012 Emmanuel Navarro"
__license__ = "GNU Affero General Public License, version >= 3"
__version__ = "0.1"
import time
import sys, os
import logging
import subprocess
import threading
from Queue import Queue, Empty
def pipe_writer(pipe, text):
try:
pipe.write(text)
if text[-1] != '\n':
pipe.write("\n")
pipe.write("\f\f\n")
pipe.flush()
except:
raise
def pipe_reader(pipe, queue):
for line in iter(pipe.readline, b''):
queue.put(line)
class Talismane():
""" Small Talismane wrapper.
For more information about Talismane see:
* https://github.com/urieli/talismane
* http://redac.univ-tlse2.fr/applications/talismane.html
"""
def __init__(self, talismane_jar):
self.talismane_jar = talismane_jar
self._logger = logging.getLogger("Talismane")
# internal variables used to communicate with talismane
self._tm_process = None
self._tm_reader = None
self._tm_output = None
if not os.path.isfile(talismane_jar):
raise ValueError("The indicated talismane jar file doesn't exist !")
self._start_process()
def _start_process(self):
""" Start Talismane process.
Internal use.
"""
#cmd: $ java -Xmx1G -jar talismane.jar command=analyse
talismane_cmd = ["java"]
talismane_cmd.append("-Xmx1G") #TODO: add an option for Talismane available ram
talismane_cmd.append("-jar")
talismane_cmd.append(self.talismane_jar)
talismane_cmd.append("command=analyse")
try:
self._tm_process = subprocess.Popen(
talismane_cmd, # Use a list of params in place of a string.
bufsize=0, # Not buffered to retrieve data asap from Talismane
stdin=subprocess.PIPE, # Get a pipe to write input data
stdout=subprocess.PIPE, # Get a pipe to read processing results
)
time.sleep(2)
self._logger.info("Started Talismane from command: %r", " ".join(talismane_cmd))
self._tm_output = Queue()
# starts the reader of Talismane output
self._tm_reader = threading.Thread(
target=pipe_reader,
args=(self._tm_process.stdout, self._tm_output)
)
self._tm_reader.daemon = True # thread dies with the program
self._tm_reader.start()
except:
self._logger.error("Failure to start Talismane with: %r", \
" ".join(talismane_cmd), exc_info=True)
raise
def __del__ (self) :
""" Wrapper to be deleted.
Cut links with Talismane process.
"""
if self._tm_process:
self._tm_process.terminate()
def analyse(self, texte):
""" Make the given text analyzed by Talismane
"""
writer = threading.Thread(
target=pipe_writer,
args=(self._tm_process.stdin, texte)
)
writer.start()
res = []
get_results = False # True when the results starts to be read
result_end = False # True when all the results have been readed
previous_line = None
while not result_end:
try:
line = self._tm_output.get_nowait()
except Empty:
# Result not yet available, wait please
time.sleep(0.2)
continue
if get_results and line == "\n" and previous_line == "\n":
result_end = True
else:
previous_line = line
line = line.strip()
if line:
get_results = True
token = line.split("\t")
res.append(token)
# Synchronize to avoid possible problems.
writer.join()
return res
def main():
from optparse import OptionParser
usage = """usage: %prog [options]"""
parser = OptionParser(usage=usage)
# Wikipedia
parser.add_option("-t", "--talismane-jar", action="store", type=str,
default='talismane.jar', dest="talismane_jar", help="path to the Talismane jar")
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("You should provide no arguments")
# setup logging
logging_level = logging.DEBUG
logger = logging.getLogger('Talismane')
logger.setLevel(logging_level)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging_level)
# create formatter and add it to the handlers
formatter = logging.Formatter("%(asctime)s:%(levelname)s:%(name)s:%(message)s")
ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(ch)
# Run examples
try:
tm = Talismane(options.talismane_jar)
except ValueError as e:
print("Erreur: %s" % e)
return 1
print("--1--")
res = tm.analyse("Je déguste du python.")
for token in res:
print("%s" % token)
print("--2--")
res = tm.analyse("Il danse la Java, mais le python ne vas pas lui offrir des perles ou des rubis pour noel. En voila une autre phrase !")
for token in res:
print("%s" % token)
return 0
if __name__ == '__main__':
sys.exit(main())