Skip to content

Commit

Permalink
Using celery task for neuro cognates -- ispras/lingvodoc-react#1182
Browse files Browse the repository at this point in the history
  • Loading branch information
vmonakhov committed Feb 17, 2025
1 parent 406d088 commit aa66310
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 107 deletions.
70 changes: 32 additions & 38 deletions lingvodoc/schema/gql_cognate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5630,7 +5630,6 @@ class Arguments:
base_language_id = LingvodocID()
input_pairs = ObjectVal()
truth_threshold = graphene.Float()
stamp = graphene.Float()

debug_flag = graphene.Boolean()

Expand All @@ -5640,27 +5639,25 @@ class Arguments:
message = graphene.String()
perspective_name_list = graphene.List(graphene.String)
transcription_count = graphene.Int()
stamp = graphene.Float()

@staticmethod
def neuro_cognate_statistics(
#language_str,
#base_language_id,
#base_language_name,
perspective_info_list,
source_perspective_id,
match_translations,
input_pairs,
locale_id,
user_id,
truth_threshold,
stamp,
#storage,
storage,
host_url,
cache_kwargs,
debug_flag = False):

input_pairs_list = input_pairs or []
compare_pairs_list = []
total_transcription_count = len(input_pairs) if input_pairs else 0
input_index = None
dictionary_name_list = []
perspective_name_list = []

for (
Expand All @@ -5686,60 +5683,64 @@ def neuro_cognate_statistics(

if perspective_id != source_perspective_id:
compare_pairs_list.append(current_pairs_list[:])
total_transcription_count += len(current_pairs_list)
else:
input_index = idx
compare_pairs_list.append([])
if not input_pairs_list:
input_pairs_list = current_pairs_list[:]
total_transcription_count += len(current_pairs_list)

perspective = DBSession.query(dbPerspective).filter_by(
client_id = perspective_id[0], object_id = perspective_id[1]).first()

perspective_name = perspective.get_translation(locale_id)
dictionary_name = perspective.parent.get_translation(locale_id)

dictionary_name_list.append(dictionary_name)
perspective_name_list.append(f"{perspective_name} - {dictionary_name}")

message = ""
triumph = True
prediction = None
input_len = len(input_pairs_list)
compare_len = sum(map(len, compare_pairs_list))
stamp_file = f"/tmp/lingvodoc_stamps/{stamp}"
dictionaries = []

if not input_pairs_list or not compare_len:
if not input_len or not compare_len:
triumph = False
message = "No input words or words to compare is received"
elif compare_len > 10 ** 4:
triumph = False
message = "Too large dictionaries to compare"
message = f"Too many words to compare: {compare_len}"
else:
for i, d in enumerate(dictionary_name_list, 1):
dictionaries.append(f"{i}. {d}")

task = TaskStatus(user_id, 'Neuro cognates computation', '\n\n'.join(dictionaries), input_len)
task.set(1, 0, "first words processing...", "")

NeuroCognatesEngine = NeuroCognates(
compare_pairs_list,
input_index,
source_perspective_id,
perspective_name_list,
storage,
host_url,
cache_kwargs,
match_translations,
truth_threshold,
stamp_file)
truth_threshold)

prediction = NeuroCognatesEngine.index(input_pairs_list)
NeuroCognatesEngine.index(input_pairs_list, task)

result_dict = (
dict(
triumph=triumph,
stamp=stamp,
suggestion_list=prediction,
message=message,
perspective_name_list=perspective_name_list,
transcription_count=total_transcription_count))
message=message))

return NeuroCognateAnalysis(**result_dict)

@staticmethod
def mutate(
self,
info,
stamp,
source_perspective_id,
perspective_info_list,
match_translations,
Expand Down Expand Up @@ -5802,18 +5803,12 @@ def mutate(
base_language_id[0], base_language_id[1]))

try:

# Getting base language info.

locale_id = info.context.locale_id

#base_language = DBSession.query(dbLanguage).filter_by(
#client_id = base_language_id[0], object_id = base_language_id[1]).first()

#base_language_name = base_language.get_translation(locale_id)

#request = info.context.request
#storage = request.registry.settings['storage']
request = info.context.request
host_url = request.environ['HTTP_ORIGIN']
storage = request.registry.settings['storage']
cache_kwargs = request.registry.settings['cache_kwargs']

# Transforming client/object pair ids from lists to 2-tuples.

Expand All @@ -5836,17 +5831,16 @@ def mutate(
_ in perspective_info_list]

return NeuroCognateAnalysis.neuro_cognate_statistics(
#language_str,
#base_language_id,
#base_language_name,
perspective_info_list,
source_perspective_id,
match_translations,
input_pairs,
locale_id,
user.id,
truth_threshold,
stamp,
#storage,
storage,
host_url,
cache_kwargs,
debug_flag)

# Exception occurred while we tried to perform swadesh analysis.
Expand Down
31 changes: 21 additions & 10 deletions lingvodoc/schema/gql_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@
HTTPUnauthorized
)
from lingvodoc.views.v2.desktop_sync.core import async_download_dictionary
import os
import json
import requests
from pyramid.request import Request
from pathlib import Path
from pyramid.response import Response
from lingvodoc.utils.search import recursive_sort
from pdb import set_trace as A

from lingvodoc.cache.caching import CACHE
from lingvodoc.cache.caching import CACHE, initialize_cache

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -383,7 +383,7 @@ def mutate(root, info, **args):
class StopMutation(graphene.Mutation):

class Arguments:
stamp = graphene.Float(required=True)
stamp = graphene.String(required=True)

triumph = graphene.Boolean()

Expand All @@ -396,12 +396,23 @@ def mutate(root, info, stamp):
if not client:
return ResponseError('Only authorized users can stop running mutations.')

stamps_path = "/tmp/lingvodoc_stamps"

# Touch stamp file
Path(stamps_path).mkdir(exist_ok=True)
open(f"{stamps_path}/{stamp}", 'a').close()

print("!!! Stamp-to-stop")
request = info.context.request
storage = request.registry.settings['storage']
stamp_path = os.path.join(storage['path'], 'lingvodoc_stamps')
stamp_file = os.path.join(stamp_path, stamp)
os.makedirs(stamp_path, exist_ok=True)

'''
cache_kwargs = request.registry.settings['cache_kwargs']
initialize_cache(cache_kwargs)
task = TaskStatus.get_from_cache(stamp)
'''

# Touch stamp file if it does not exist,
# otherwise we delete it
if not os.path.isfile(stamp_file):
open(stamp_file, 'a').close()
else:
os.remove(stamp_file)

return StopMutation(triumph=True)
23 changes: 23 additions & 0 deletions lingvodoc/schema/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,11 @@ class Query(graphene.ObjectType):
xcript_fid = LingvodocID(required = True),
xlat_fid = LingvodocID(required = True)))

result_suggestions = (
graphene.Field(
ObjectVal,
result_file = graphene.String(required = True)))

def resolve_fill_logs(self, info, worker=1):
# Check if the current user is administrator
client_id = info.context.client_id
Expand Down Expand Up @@ -5323,6 +5328,24 @@ def resolve_words(self,

return result_pairs_list

def resolve_result_suggestions(self,
info,
result_file):
storage = (
info.context.request.registry.settings['storage'])

pickle_path = os.path.join(storage['path'], 'neuro_cognates', result_file)

try:
with gzip.open(pickle_path, 'rb') as pickle_file:
result_dict = pickle.load(pickle_file)

except:
return ResponseError(f'Cannot access file \'{pickle_path}\'.')

return result_dict


class PerspectivesAndFields(graphene.InputObjectType):
perspective_id = LingvodocID()
field_id = LingvodocID()
Expand Down
Loading

0 comments on commit aa66310

Please sign in to comment.