daisybio · nictru · Dec 20, 2024 · Dec 20, 2024
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -7,9 +7,9 @@ name: Docker
 
 on:
   push:
-    branches: [ "master", "dev" ]
+    branches: [ "master" ]
   pull_request:
-    branches: [ "master", "dev" ]
+    branches: [ "master" ]
 
 env:
   # Use docker.io for Docker Hub if empty

diff --git a/Dockerfile b/Dockerfile
@@ -1,19 +1,32 @@
-FROM community.wave.seqera.io/library/bioconductor-gsva_bioconductor-sponge_gunicorn_python_pruned:e9c5176f69f5398d
+FROM python:3.12.6-bullseye
+
+# Install required packages using apt
+# RUN apt-get update && apt-get install -y \
+#     libmariadb3 libmariadb-dev build-essential linux-headers-amd64 mariadb-connector-c \
+#     && rm -rf /var/lib/apt/lists/*
 
-# Install required system dependencies for MySQL, R, and Conda
 RUN apt-get update && apt-get install -y \
-    default-mysql-client pkg-config default-libmysqlclient-dev build-essential \
+    default-mysql-client default-libmysqlclient-dev build-essential linux-headers-amd64 \
     && rm -rf /var/lib/apt/lists/*
 
+# Upgrade pip
+RUN pip3 install --upgrade pip
 
-# Install Python dependencies
 WORKDIR /server
-COPY requirements.txt /server/requirements.txt
-RUN micromamba run pip install --no-cache-dir -r requirements.txt
-
-# Copy application code
 COPY . /server
 
-# Start the application using gunicorn with UvicornWorker
+RUN pip3 --no-cache-dir install -r requirements.txt
+RUN pip install debugpy
+
+
+# the mariadb plugin directory seems to be misconfigured
+# bei default. In order to work properly we manually adjust
+# the path.
+# ENV MARIADB_PLUGIN_DIR /usr/lib/mariadb/plugin
+
+# EXPOSE 5000
+# CMD ["python3", "server.py"]
+
+#run the command to start uWSGI
 CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:5000", "-w", "4", "server:connex_app"]
 
diff --git a/app/controllers/alternativeSplicing.py b/app/controllers/alternativeSplicing.py
@@ -1,7 +1,6 @@
 from flask import abort
 import app.models as models
 from flask import Response
-from app.config import db
 
 def get_transcript_events(enst_number):
     """
@@ -92,50 +91,3 @@ def get_exons_for_position(start_pos: int, end_pos: int):
     else:
         abort(404, "No data found that satisfies the given filters")
 
-
-def get_psi_values(transcript_ID: str = None, enst_number: str =None, psivec_ID: int = None, alternative_splicing_event_transcripts_ID: str = None, sample_ID: str = None, limit=100):
-    """
-    This function response for the request: /alternativeSplicing/getPsiValue/
-    with the possibility to filter by psivec_ID, alternative
-    splicing event transcripts ID and sample ID
-    :param psivec_ID: ID of the psivec
-    :param alternative_splicing_event_transcripts_ID: ID of the alternative splicing event transcripts
-    :param sample_ID: ID of the sample
-    :return: psi value for the given parameters, ordered by psi value
-    """
-    # Build the transcript query
-    transcript_query = db.select(models.Transcript.transcript_ID)
-    if transcript_ID:
-        transcript_query = transcript_query.where(models.Transcript.transcript_ID == transcript_ID)
-    if enst_number:
-        transcript_query = transcript_query.where(models.Transcript.enst_number == enst_number)
-
-    # Build the alternative splicing events query
-    as_query = db.select(models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID).where(
-        models.AlternativeSplicingEventTranscripts.transcript_ID.in_(transcript_query)
-    )
-    if alternative_splicing_event_transcripts_ID:
-        as_query = as_query.where(
-            models.AlternativeSplicingEventTranscripts.alternative_splicing_event_transcripts_ID == alternative_splicing_event_transcripts_ID
-        )
-
-    # Build the psi values query
-    psi_query = db.select(models.PsiVec).where(
-        models.PsiVec.alternative_splicing_event_transcripts_ID.in_(as_query)
-    )
-    if psivec_ID:
-        psi_query = psi_query.where(models.PsiVec.psivec_ID == psivec_ID)
-    if sample_ID:
-        psi_query = psi_query.where(models.PsiVec.sample_ID == sample_ID)
-
-    # Apply limit and sort results
-    psi_query = psi_query.order_by(models.PsiVec.psi_value.desc()).limit(limit)
-
-    psi_values = db.session.execute(psi_query).scalars().all()
-
-    if psi_values:
-        schema = models.PsiVecSchema(many=True)
-        return schema.dump(psi_values)
-    else:
-        abort(404, "No data found that satisfies the given filters")
-
diff --git a/app/controllers/comparison.py b/app/controllers/comparison.py
@@ -44,26 +44,20 @@ def _comparison_query(dataset_1, dataset_2, condition_1=None, condition_2=None,
     if condition_2 is not None:
         comparison = comparison.filter(models.Comparison.condition_2 == condition_2)
 
-    comparisons = comparison.all()
+    comparison = comparison.all()
 
     # check if comparison is named differently 
-    if len(comparisons) == 0:
+    if len(comparison) == 0:
         reverse = True
         comparison = models.Comparison.query \
             .filter(models.Comparison.dataset_ID_1.in_(dataset_2)) \
             .filter(models.Comparison.dataset_ID_2.in_(dataset_1)) \
             .filter(models.Comparison.gene_transcript == gene_transcript) 
+
+    if len(comparison) != 1:
+        abort(404, "No (unique) comparison found for given inputs")
 
-        comparisons = comparison.all()    
-
-    # error if no comparison found
-    if len(comparisons) == 0:
-        abort(404, "No comparison found for given inputs")
-
-    if len(comparisons) > 1:
-        abort(404, "Multiple comparisons found for given inputs")
-
-    return comparisons, reverse
+    return comparison.all(), reverse
 
 
 def get_comparison(dataset_ID: str = None, disease_name: str = None, disease_subtype=None, sponge_db_version: int = LATEST):

diff --git a/app/controllers/geneInteraction.py b/app/controllers/geneInteraction.py
@@ -650,53 +650,88 @@ def read_mirna_for_specific_interaction(dataset_ID: int = None, disease_name=Non
     :param sponge_db_version: version of the sponge database
     :return: all miRNAs contributing to the interactions between genes of interest
     """
+    # test if any of the two identification possibilites is given
+    if ensg_number is None and gene_symbol is None:
+        abort(404, "One of the two possible identification numbers must be provided")
+
+    if ensg_number is not None and gene_symbol is not None:
+        abort(404,
+              "More than one identifikation paramter is given. Please choose one out of (ensg number, gene symbol)")
+
+    # get all sponge_runs for the given sponge_db_version
+    run = models.SpongeRun.query \
+        .filter(models.SpongeRun.sponge_db_version == sponge_db_version)
 
-    # get diseases 
-    disease_query = db.select(models.Dataset.dataset_ID).where(models.Dataset.sponge_db_version == sponge_db_version)
+    queries = []
+    run_IDs = []
+    # if specific disease_name is given:
     if disease_name is not None:
-        disease_query = disease_query.where(models.Dataset.disease_name.like("%" + disease_name + "%"))
+        run = models.SpongeRun.query.join(models.Dataset, models.Dataset.dataset_ID == models.SpongeRun.dataset_ID) \
+            .filter(models.Dataset.disease_name.like("%" + disease_name + "%"))
+
     if dataset_ID is not None:
-        disease_query = disease_query.where(models.Dataset.dataset_ID == dataset_ID)
+        run = run.filter(models.Dataset.dataset_ID == dataset_ID)
+
+    run = run.all()
 
-    # filter runs for diseases
-    run_query = db.select(models.SpongeRun.sponge_run_ID).where(models.SpongeRun.dataset_ID.in_(disease_query))
+    if len(run) > 0:
+        run_IDs = [i.sponge_run_ID for i in run]
+        queries.append(models.miRNAInteraction.sponge_run_ID.in_(run_IDs))
+    else:
+        abort(404, "No dataset with given disease_name found")
 
-    # get gene IDs 
-    gene_query = db.select(models.Gene.gene_ID)
+    gene = []
+    # if ensg_numer is given to specify gene(s), get the intern gene_ID(primary_key) for requested ensg_nr(gene_ID)
     if ensg_number is not None:
-        gene_query = gene_query.where(models.Gene.ensg_number.in_(ensg_number))
-    if gene_symbol is not None:
-        gene_query = gene_query.where(models.Gene.gene_symbol.in_(gene_symbol))
+        gene = models.Gene.query \
+            .filter(models.Gene.ensg_number.in_(ensg_number)) \
+            .all()
+    # if gene_symbol is given to specify gene(s), get the intern gene_ID(primary_key) for requested gene_symbol(gene_ID)
+    elif gene_symbol is not None:
+        gene = models.Gene.query \
+            .filter(models.Gene.gene_symbol.in_(gene_symbol)) \
+            .all()
 
-    # Get all interactions for the given genes and runs
-    base_interaction_query = db.select(models.miRNAInteraction).where(
-        models.miRNAInteraction.gene_ID.in_(gene_query),
-        models.miRNAInteraction.sponge_run_ID.in_(run_query),
-    )
+    gene_IDs = []
+    if len(gene) > 0:
+        gene_IDs = [i.gene_ID for i in gene]
+        queries.append(models.miRNAInteraction.gene_ID.in_(gene_IDs))
+    else:
+        abort(404, "No gene found for given identifiers.")
 
+    interaction_result = []
     if between:
-        # Subquery to count distinct genes
-        distinct_gene_count_subquery = (
-            db.select(db.func.count(db.func.distinct(gene_query.c.gene_ID))).scalar_subquery()
-        )
-        print(distinct_gene_count_subquery)
-
-        # Subquery to get miRNA IDs that meet the 'between' condition
-        mirna_query = db.select(models.miRNAInteraction.miRNA_ID) \
-            .where(models.miRNAInteraction.gene_ID.in_(gene_query)) \
-            .where(models.miRNAInteraction.sponge_run_ID.in_(run_query)) \
-            .group_by(models.miRNAInteraction.miRNA_ID) \
-            .having(db.func.count(models.miRNAInteraction.gene_ID) == distinct_gene_count_subquery)
-
-        # Filter interactions by the miRNA IDs from the previous subquery
-        interaction_query = base_interaction_query.where(
-            models.miRNAInteraction.miRNA_ID.in_(mirna_query)
-        )
-    else:
-            interaction_query = base_interaction_query
+        # an Engine, which the Session will use for connection resources
+        some_engine = sa.create_engine(os.getenv("SPONGE_DB_URI"), pool_recycle=30)
+
+        # create a configured "Session" class
+        Session = sa.orm.sessionmaker(bind=some_engine)
+
+        # create a Session
+        session = Session()
+        # test for each dataset if the gene(s) of interest are included in the ceRNA network
+
+        mirna_filter = session.execute(text("select mirna_ID from interactions_genemirna where sponge_run_ID IN ( "
+                                       + ','.join(str(e) for e in run_IDs) + ") and gene_ID IN ( "
+                                       + ','.join(str(e) for e in gene_IDs)
+                                       + ") group by mirna_ID HAVING count(mirna_ID) >= 2;")).fetchall()
 
-    interaction_result = db.session.execute(interaction_query).scalars().all()
+        session.close()
+        some_engine.dispose()
 
+        if len(mirna_filter) == 0:
+            abort(404, "No shared miRNA between genes found.")
+
+        flat_mirna_filter = [item for sublist in mirna_filter for item in sublist]
+        queries.append(models.miRNAInteraction.miRNA_ID.in_(flat_mirna_filter))
+
+        interaction_result = models.miRNAInteraction.query \
+            .filter(*queries) \
+            .all()
+    else:
+        interaction_result = models.miRNAInteraction.query \
+            .filter(*queries) \
+            .all()
 
     if len(interaction_result) > 0:
         # Serialize the data for the response depending on parameter all