Skip to content

Commit

Permalink
Merge pull request #1322 from ankan2013/heavy_refactor
Browse files Browse the repository at this point in the history
Kazakh and Tatar parsers added
  • Loading branch information
myrix authored Oct 5, 2021
2 parents 02cfc12 + 6524910 commit a74bf75
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 108 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ RUN \
pip3 install --upgrade setuptools==44.0 && \
pip3 install -r server-requirements.txt && \
pip3 install alembic gunicorn==19.7.1
RUN \
locale-gen en_US.UTF-8 && update-locale && \
apt install -y lttoolbox apertium-dev apertium-lex-tools hfst libhfst-dev cg3-dev
31 changes: 31 additions & 0 deletions alembic/versions/71a35496d931_kaz_tat_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Kazakh and Tatar parsers
Revision ID: 71a35496d931
Revises: d15043d2cbd9
Create Date: 2021-10-05 04:34:20.845470
"""

# revision identifiers, used by Alembic.
revision = '71a35496d931'
down_revision = 'd15043d2cbd9'
branch_labels = None
depends_on = None

from alembic import op

def upgrade():
op.execute('''
INSERT INTO public.parser(additional_metadata, created_at, object_id, client_id, name, parameters, method)
VALUES(null, '2021-10-05 20:22:00.000000', 6, 1, 'Парсер казахского языка Apertium', '[]',
'apertium_kaz_rus');
INSERT INTO public.parser(additional_metadata, created_at, object_id, client_id, name, parameters, method)
VALUES(null, '2021-10-05 20:23:00.000000', 7, 1, 'Парсер татарского языка Apertium', '[]',
'apertium_tat_rus');
''')

def downgrade():
op.execute('''
DELETE FROM parser WHERE method = 'apertium_kaz_rus';
DELETE FROM parser WHERE method = 'apertium_tat_rus';
''')
4 changes: 2 additions & 2 deletions docker/docker.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ pyramid.includes =

sqlalchemy.url = postgresql+psycopg2://postgres:password@pg:5432/lingvodoc

# This parameter should be specified manually
# These parameters should be specified manually
dedoc_url = dedoc_url

apertium_path = /absolute/path
# By default, the toolbar only appears for clients from IP addresses
# '127.0.0.1' and '::1'.
# debugtoolbar.hosts = 127.0.0.1 ::1
Expand Down
10 changes: 10 additions & 0 deletions lingvodoc/schema/gql_parserresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,16 @@ def mutate(root, info, **args):
cur_args["dedoc_url"] = request.registry.settings["dedoc_url"]
except KeyError:
raise ResponseError(message="Dedoc server url was not provided in configuration")
if parser.method.find("apertium") == -1:
cur_args["apertium_path"] = ""
else:
msg = "The path to the folder with Apertium parsers was not provided in configuration"
try:
cur_args["apertium_path"] = request.registry.settings["apertium_path"]
except KeyError:
raise ResponseError(message=msg)
if len(cur_args["apertium_path"]) == 0:
raise ResponseError(message=msg)

async_execution = args.get("async_execution")
if async_execution == None or async_execution == True:
Expand Down
16 changes: 10 additions & 6 deletions lingvodoc/utils/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,15 @@ def create_lexicalentry(id, perspective_id, save_object=False):

@celery.task
def async_create_parser_result(id, parser_id, entity_id,
task_key, cache_kwargs, sqlalchemy_url, dedoc_url,
task_key, cache_kwargs, sqlalchemy_url, dedoc_url, apertium_path,
arguments, save_object):
async_create_parser_result_method(id=id, parser_id=parser_id, entity_id=entity_id,
task_key=task_key, cache_kwargs=cache_kwargs,
sqlalchemy_url=sqlalchemy_url, dedoc_url=dedoc_url,
sqlalchemy_url=sqlalchemy_url, dedoc_url=dedoc_url, apertium_path=apertium_path,
arguments=arguments, save_object=save_object)

def async_create_parser_result_method(id, parser_id, entity_id,
task_key, cache_kwargs, sqlalchemy_url, dedoc_url,
task_key, cache_kwargs, sqlalchemy_url, dedoc_url, apertium_path,
arguments, save_object):

from lingvodoc.cache.caching import initialize_cache
Expand All @@ -433,7 +433,7 @@ def async_create_parser_result_method(id, parser_id, entity_id,

try:

create_parser_result(id=id, parser_id=parser_id, entity_id=entity_id, dedoc_url=dedoc_url,
create_parser_result(id=id, parser_id=parser_id, entity_id=entity_id, dedoc_url=dedoc_url, apertium_path=apertium_path,
arguments=arguments, save_object=save_object)

except Exception as err:
Expand All @@ -445,7 +445,7 @@ def async_create_parser_result_method(id, parser_id, entity_id,
# Downloads a document by the URL in an entity's content and saves the result of its parsing


def create_parser_result(id, parser_id, entity_id, dedoc_url, arguments=None, save_object=True):
def create_parser_result(id, parser_id, entity_id, dedoc_url, apertium_path, arguments=None, save_object=True):

client_id, object_id = id
parser_client_id, parser_object_id = parser_id
Expand Down Expand Up @@ -476,7 +476,11 @@ def create_parser_result(id, parser_id, entity_id, dedoc_url, arguments=None, sa
os.rename(tmp_filename, tmp_filename + extension)
tmp_filename = tmp_filename + extension

result = parse_method(tmp_filename, dedoc_url, **arguments)
if parser.method.find("timarkh") != -1:
result = parse_method(tmp_filename, dedoc_url, **arguments)

if parser.method.find("apertium") != -1:
result = parse_method(tmp_filename, dedoc_url, apertium_path, **arguments)

dbparserresult = ParserResult(client_id=client_id, object_id=object_id,
parser_object_id=parser_object_id, parser_client_id=parser_client_id,
Expand Down
Loading

0 comments on commit a74bf75

Please sign in to comment.