Using celery task for neuro cognates -- ispras/lingvodoc-react#1182

ispras · Feb 17, 2025 · aa66310 · aa66310
1 parent 406d088
commit aa66310
Show file tree

Hide file tree

Showing 4 changed files with 223 additions and 107 deletions.
diff --git a/lingvodoc/schema/gql_cognate.py b/lingvodoc/schema/gql_cognate.py
@@ -5630,7 +5630,6 @@ class Arguments:
         base_language_id = LingvodocID()
         input_pairs = ObjectVal()
         truth_threshold = graphene.Float()
-        stamp = graphene.Float()
 
         debug_flag = graphene.Boolean()
 
@@ -5640,27 +5639,25 @@ class Arguments:
     message = graphene.String()
     perspective_name_list = graphene.List(graphene.String)
     transcription_count = graphene.Int()
-    stamp = graphene.Float()
 
     @staticmethod
     def neuro_cognate_statistics(
-            #language_str,
-            #base_language_id,
-            #base_language_name,
             perspective_info_list,
             source_perspective_id,
             match_translations,
             input_pairs,
             locale_id,
+            user_id,
             truth_threshold,
-            stamp,
-            #storage,
+            storage,
+            host_url,
+            cache_kwargs,
             debug_flag = False):
 
         input_pairs_list = input_pairs or []
         compare_pairs_list = []
-        total_transcription_count = len(input_pairs) if input_pairs else 0
         input_index = None
+        dictionary_name_list = []
         perspective_name_list = []
 
         for (
@@ -5686,60 +5683,64 @@ def neuro_cognate_statistics(
 
             if perspective_id != source_perspective_id:
                 compare_pairs_list.append(current_pairs_list[:])
-                total_transcription_count += len(current_pairs_list)
             else:
                 input_index = idx
                 compare_pairs_list.append([])
                 if not input_pairs_list:
                     input_pairs_list = current_pairs_list[:]
-                    total_transcription_count += len(current_pairs_list)
 
             perspective = DBSession.query(dbPerspective).filter_by(
                 client_id = perspective_id[0], object_id = perspective_id[1]).first()
 
             perspective_name = perspective.get_translation(locale_id)
             dictionary_name = perspective.parent.get_translation(locale_id)
 
+            dictionary_name_list.append(dictionary_name)
             perspective_name_list.append(f"{perspective_name} - {dictionary_name}")
 
         message = ""
         triumph = True
-        prediction = None
+        input_len = len(input_pairs_list)
         compare_len = sum(map(len, compare_pairs_list))
-        stamp_file = f"/tmp/lingvodoc_stamps/{stamp}"
+        dictionaries = []
 
-        if not input_pairs_list or not compare_len:
+        if not input_len or not compare_len:
             triumph = False
             message = "No input words or words to compare is received"
         elif compare_len > 10 ** 4:
             triumph = False
-            message = "Too large dictionaries to compare"
+            message = f"Too many words to compare: {compare_len}"
         else:
+            for i, d in enumerate(dictionary_name_list, 1):
+                dictionaries.append(f"{i}. {d}")
+
+            task = TaskStatus(user_id, 'Neuro cognates computation', '\n\n'.join(dictionaries), input_len)
+            task.set(1, 0, "first words processing...", "")
+
             NeuroCognatesEngine = NeuroCognates(
                 compare_pairs_list,
                 input_index,
+                source_perspective_id,
+                perspective_name_list,
+                storage,
+                host_url,
+                cache_kwargs,
                 match_translations,
-                truth_threshold,
-                stamp_file)
+                truth_threshold)
 
-            prediction = NeuroCognatesEngine.index(input_pairs_list)
+            NeuroCognatesEngine.index(input_pairs_list, task)
 
         result_dict = (
             dict(
                 triumph=triumph,
-                stamp=stamp,
-                suggestion_list=prediction,
-                message=message,
-                perspective_name_list=perspective_name_list,
-                transcription_count=total_transcription_count))
+                message=message))
 
         return NeuroCognateAnalysis(**result_dict)
 
     @staticmethod
     def mutate(
         self,
         info,
-        stamp,
         source_perspective_id,
         perspective_info_list,
         match_translations,
@@ -5802,18 +5803,12 @@ def mutate(
                 base_language_id[0], base_language_id[1]))
 
         try:
-
-            # Getting base language info.
-
             locale_id = info.context.locale_id
 
-            #base_language = DBSession.query(dbLanguage).filter_by(
-                #client_id = base_language_id[0], object_id = base_language_id[1]).first()
-
-            #base_language_name = base_language.get_translation(locale_id)
-
-            #request = info.context.request
-            #storage = request.registry.settings['storage']
+            request = info.context.request
+            host_url = request.environ['HTTP_ORIGIN']
+            storage = request.registry.settings['storage']
+            cache_kwargs = request.registry.settings['cache_kwargs']
 
             # Transforming client/object pair ids from lists to 2-tuples.
 
@@ -5836,17 +5831,16 @@ def mutate(
                     _ in perspective_info_list]
 
             return NeuroCognateAnalysis.neuro_cognate_statistics(
-                #language_str,
-                #base_language_id,
-                #base_language_name,
                 perspective_info_list,
                 source_perspective_id,
                 match_translations,
                 input_pairs,
                 locale_id,
+                user.id,
                 truth_threshold,
-                stamp,
-                #storage,
+                storage,
+                host_url,
+                cache_kwargs,
                 debug_flag)
 
         # Exception occurred while we tried to perform swadesh analysis.

diff --git a/lingvodoc/schema/gql_sync.py b/lingvodoc/schema/gql_sync.py
@@ -43,15 +43,15 @@
     HTTPUnauthorized
 )
 from lingvodoc.views.v2.desktop_sync.core import async_download_dictionary
+import os
 import json
 import requests
 from pyramid.request import Request
-from pathlib import Path
 from pyramid.response import Response
 from lingvodoc.utils.search import recursive_sort
 from pdb import set_trace as A
 
-from lingvodoc.cache.caching import CACHE
+from lingvodoc.cache.caching import CACHE, initialize_cache
 
 log = logging.getLogger(__name__)
 
@@ -383,7 +383,7 @@ def mutate(root, info, **args):
 class StopMutation(graphene.Mutation):
 
     class Arguments:
-        stamp = graphene.Float(required=True)
+        stamp = graphene.String(required=True)
 
     triumph = graphene.Boolean()
 
@@ -396,12 +396,23 @@ def mutate(root, info, stamp):
         if not client:
             return ResponseError('Only authorized users can stop running mutations.')
 
-        stamps_path = "/tmp/lingvodoc_stamps"
-
-        # Touch stamp file
-        Path(stamps_path).mkdir(exist_ok=True)
-        open(f"{stamps_path}/{stamp}", 'a').close()
-
-        print("!!! Stamp-to-stop")
+        request = info.context.request
+        storage = request.registry.settings['storage']
+        stamp_path = os.path.join(storage['path'], 'lingvodoc_stamps')
+        stamp_file = os.path.join(stamp_path, stamp)
+        os.makedirs(stamp_path, exist_ok=True)
+
+        '''
+        cache_kwargs = request.registry.settings['cache_kwargs']
+        initialize_cache(cache_kwargs)
+        task = TaskStatus.get_from_cache(stamp)
+        '''
+
+        # Touch stamp file if it does not exist,
+        # otherwise we delete it
+        if not os.path.isfile(stamp_file):
+            open(stamp_file, 'a').close()
+        else:
+            os.remove(stamp_file)
 
         return StopMutation(triumph=True)
diff --git a/lingvodoc/schema/query.py b/lingvodoc/schema/query.py
@@ -695,6 +695,11 @@ class Query(graphene.ObjectType):
             xcript_fid = LingvodocID(required = True),
             xlat_fid = LingvodocID(required = True)))
 
+    result_suggestions = (
+        graphene.Field(
+            ObjectVal,
+            result_file = graphene.String(required = True)))
+
     def resolve_fill_logs(self, info, worker=1):
         # Check if the current user is administrator
         client_id = info.context.client_id
@@ -5323,6 +5328,24 @@ def resolve_words(self,
 
         return result_pairs_list
 
+    def resolve_result_suggestions(self,
+                                   info,
+                                   result_file):
+        storage = (
+            info.context.request.registry.settings['storage'])
+
+        pickle_path = os.path.join(storage['path'], 'neuro_cognates', result_file)
+
+        try:
+            with gzip.open(pickle_path, 'rb') as pickle_file:
+                result_dict = pickle.load(pickle_file)
+
+        except:
+            return ResponseError(f'Cannot access file \'{pickle_path}\'.')
+
+        return result_dict
+
+
 class PerspectivesAndFields(graphene.InputObjectType):
     perspective_id = LingvodocID()
     field_id = LingvodocID()