WIP: Added Qiita study reserved-word check (#91)

charles-cowart · web-flow · commit 1cd30c46de08 · 2024-09-12T10:46:07.000-06:00
* Added Qiita study reserved-word check

* Update
diff --git a/qp_klp/Step.py b/qp_klp/Step.py
@@ -950,7 +950,42 @@ def update_blanks_in_qiita(self, qclient):
                 qclient.http_patch(f'/api/v1/study/{study_id}/samples',
                                    data=dumps(data))
 
+    def _project_metadata_check(self, qclient):
+        # Let Pipeline() retrieve the needed qiita study ids from the user
+        # input while this plugin queries for the existing set of column
+        # names in each project's sample metadata. We'll let Pipeline()
+        # decide (using its metapool dependency) which column names are
+        # reserved.
+        qiita_ids = [x['qiita_id'] for x in self.pipeline.get_project_info()]
+
+        results = []
+
+        for qiita_id in qiita_ids:
+            categories = qclient.get(f"/api/v1/study/{qiita_id}/samples/info")[
+                "categories"]
+
+            res = self.pipeline.identify_reserved_words(categories)
+
+            # if any reserved words were identified, generate an appropriate
+            # error message for it and add it to the list of error messages
+            # to return to the user.
+            res = [f"'{x}' exists in Qiita study {qiita_id}'s sample metadata"
+                   for x in res]
+
+            results += res
+
+        if results:
+            # return any error messages generated across all the projects.
+            raise PipelineError("\n".join(results))
+
     def precheck(self, qclient):
+        # since one of the objectives of SPP is to generate prep-info files
+        # and automatically load them into Qiita, confirm that all studies
+        # mentioned in the sample-sheet/pre-prep do not contain sample
+        # metadata that would cause an error in the pipeline after processing
+        # has already completed but the results have not yet been loaded.
+        self._project_metadata_check(qclient)
+
         # compare sample-ids/tube-ids in sample-sheet/mapping file
         # against what's in Qiita. Results are a list of dictionaries, one
         # per project.
diff --git a/qp_klp/tests/test_step.py b/qp_klp/tests/test_step.py
@@ -977,7 +977,7 @@ def test_compare_samples_against_qiita_error_handling(self):
     def test_precheck(self):
         fake_client = AnotherFakeClient()
 
-        # test that precheck() raises a PipelineError with the correct
+        # test that Step.precheck() raises a PipelineError with the correct
         # message, given the configuration of Step() and AnotherFakeClient().
 
         step = Step(self.another_pipeline, self.qiita_id, None)
@@ -991,6 +991,24 @@ def test_precheck(self):
         with self.assertRaisesRegex(PipelineError, msg):
             step.precheck(fake_client)
 
+    def test_project_metadata_check(self):
+        fake_client = FakeClient()
+
+        # self.pipeline represents a metagenomic pathway.
+        step = Step(self.pipeline, self.qiita_id, None)
+
+        # _project_metadata_check() should return w/out raising an Error if
+        # step and fake_client is used.
+        step._project_metadata_check(fake_client)
+
+        fake_client.info_in_11661['categories'].append('well_id_384')
+        fake_client.info_in_13059['categories'].append('well_id_384')
+
+        msg = ("'well_id_384' exists in Qiita study 13059's sample metadata"
+               "\n'well_id_384' exists in Qiita study 11661's sample metadata")
+        with self.assertRaisesRegex(PipelineError, msg):
+            step._project_metadata_check(fake_client)
+
     def test_conditional_fastqc_finder(self):
         self._create_alternate_test_input()