Skip to content

Commit 2ecfa1e

Browse files
committed
Merge branch 'dev' of https://github.com/qiita-spots/qiita into dev
2 parents 071eb5f + ea0a7ec commit 2ecfa1e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1066
-168
lines changed

.github/workflows/qiita-ci.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,7 @@ jobs:
104104
- name: Install plugins
105105
shell: bash -l {0}
106106
run: |
107-
wget https://data.qiime2.org/distro/core/qiime2-2022.11-py38-linux-conda.yml
108-
conda env create --quiet -n qtp-biom --file qiime2-2022.11-py38-linux-conda.yml
109-
rm qiime2-2022.11-py38-linux-conda.yml
107+
conda env create -n qtp-biom --file https://data.qiime2.org/distro/amplicon/qiime2-amplicon-2024.5-py39-linux-conda.yml
110108
export QIITA_ROOTCA_CERT=`pwd`/qiita_core/support_files/ci_rootca.crt
111109
export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
112110
export REDBIOM_HOST="http://localhost:7379"
@@ -181,7 +179,7 @@ jobs:
181179
echo "Connecting as $USER@localhost"
182180
# this line (and the -o StrictHostKeyChecking=no) is so the server
183181
# is added to the list of known servers
184-
scp -o StrictHostKeyChecking=no -i $PWD/qiita_ware/test/test_data/test_key $USER@localhost:/home/runner/work/qiita/qiita/qiita_ware/test/test_data/random_key /home/runner/work/qiita/qiita/qiita_ware/test/test_data/random_key_copy_1
182+
scp -O -o StrictHostKeyChecking=no -i $PWD/qiita_ware/test/test_data/test_key $USER@localhost:/home/runner/work/qiita/qiita/qiita_ware/test/test_data/random_key /home/runner/work/qiita/qiita/qiita_ware/test/test_data/random_key_copy_1
185183
186184
- name: Main tests
187185
shell: bash -l {0}

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# Qiita changelog
22

3+
Version 2024.10
4+
---------------
5+
6+
Deployed on October 14th, 2024
7+
8+
* Added update_resource_allocation_redis and companion code, so resource allocations summaries are available for review. Thank you @Gossty!
9+
* Now is possible to have default workflows with only one step.
10+
* `qiita_client.update_job_step` now accepts an ignore_error optional parameter. Thank you @charles-cowart!
11+
* Initial changes in `qiita_client` to have more accurate variable names: `QIITA_SERVER_CERT` -> `QIITA_ROOTCA_CERT`. Thank you @charles-cowart!
12+
* Added `get_artifact_html_summary` to `qiita_client` to retrieve the summary file of an artifact.
13+
* Re-added github actions to `https://github.com/qiita-spots/qiita_client`.
14+
* `SortMeRNA v4.3.7` superseded `Sortmerna v2.1b`, which relies on Silva 138 and now produced even mates. Thank you @ekopylova and @biocodz for the support.
15+
* `Remove SynDNA reads` superseded `SynDNA Woltka`, which now generates even mates.
16+
* `Woltka v0.1.7, paired-end` superseded `Woltka v0.1.6` in `qp-woltka`; [more information](https://qiita.ucsd.edu/static/doc/html/processingdata/woltka_pairedend.html). Thank you to @qiyunzhu for the benchmarks!
17+
* Other general fixes, like [#3424](https://github.com/qiita-spots/qiita/pull/3424), [#3425](https://github.com/qiita-spots/qiita/pull/3425), [#3439](https://github.com/qiita-spots/qiita/pull/3439), [#3440](https://github.com/qiita-spots/qiita/pull/3440).
18+
* General SPP improvements, like: [NuQC modified to preserve metadata in fastq files](https://github.com/biocore/mg-scripts/pull/155), [use squeue instead of sacct](https://github.com/biocore/mg-scripts/pull/152), , [job aborts if Qiita study contains sample metadata columns reserved for prep-infos](https://github.com/biocore/mg-scripts/pull/151), [metapool generates OverrideCycles value](https://github.com/biocore/metagenomics_pooling_notebook/pull/225).
19+
* We updated the available parameters for `Filter features against reference [filter_features]`, `Non V4 16S sequence assessment [non_v4_16s]` and all the phylogenetic analytical commands so they can use `Greengenes2 2024.09`.
20+
21+
22+
323
Version 2024.07
424
---------------
525

qiita_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
88

9-
__version__ = "2024.02"
9+
__version__ = "2024.10"

qiita_db/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from . import user
2828
from . import processing_job
2929

30-
__version__ = "2024.02"
30+
__version__ = "2024.10"
3131

3232
__all__ = ["analysis", "artifact", "archive", "base", "commands",
3333
"environment_manager", "exceptions", "investigation", "logger",

qiita_db/analysis.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,22 @@ def create(cls, owner, name, description, from_default=False,
215215
job.submit()
216216
return instance
217217

218+
@classmethod
219+
def delete_analysis_artifacts(cls, _id):
220+
"""Deletes the artifacts linked to an artifact and then the analysis
221+
222+
Parameters
223+
----------
224+
_id : int
225+
The analysis id
226+
"""
227+
analysis = cls(_id)
228+
aids = [a.id for a in analysis.artifacts if not a.parents]
229+
aids.sort(reverse=True)
230+
for aid in aids:
231+
qdb.artifact.Artifact.delete(aid)
232+
cls.delete(analysis.id)
233+
218234
@classmethod
219235
def delete(cls, _id):
220236
"""Deletes an analysis

qiita_db/artifact.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,8 @@ def can_be_submitted_to_ebi(self):
929929
# words has more that one processing step behind it
930930
fine_to_send = []
931931
fine_to_send.extend([pt.artifact for pt in self.prep_templates])
932-
fine_to_send.extend([c for a in fine_to_send for c in a.children])
932+
fine_to_send.extend([c for a in fine_to_send if a is not None
933+
for c in a.children])
933934
if self not in fine_to_send:
934935
return False
935936

@@ -1342,23 +1343,6 @@ def _helper(sql_edges, edges, nodes):
13421343
# If the job is in success we don't need to do anything
13431344
# else since it would've been added by the code above
13441345
if jstatus != 'success':
1345-
# Connect the job with his input artifacts, the
1346-
# input artifacts may or may not exist yet, so we
1347-
# need to check both the input_artifacts and the
1348-
# pending properties
1349-
for in_art in n_obj.input_artifacts:
1350-
iid = in_art.id
1351-
if iid not in nodes and iid in extra_nodes:
1352-
nodes[iid] = extra_nodes[iid]
1353-
_add_edge(edges, nodes[iid], nodes[n_obj.id])
1354-
1355-
pending = n_obj.pending
1356-
for pred_id in pending:
1357-
for pname in pending[pred_id]:
1358-
in_node_id = '%s:%s' % (
1359-
pred_id, pending[pred_id][pname])
1360-
_add_edge(edges, nodes[in_node_id],
1361-
nodes[n_obj.id])
13621346

13631347
if jstatus != 'error':
13641348
# If the job is not errored, we can add the
@@ -1380,6 +1364,34 @@ def _helper(sql_edges, edges, nodes):
13801364
queue.append(cjob.id)
13811365
if cjob.id not in nodes:
13821366
nodes[cjob.id] = ('job', cjob)
1367+
1368+
# including the outputs
1369+
for o_name, o_type in cjob.command.outputs:
1370+
node_id = '%s:%s' % (cjob.id, o_name)
1371+
node = TypeNode(
1372+
id=node_id, job_id=cjob.id,
1373+
name=o_name, type=o_type)
1374+
if node_id not in nodes:
1375+
nodes[node_id] = ('type', node)
1376+
1377+
# Connect the job with his input artifacts, the
1378+
# input artifacts may or may not exist yet, so we
1379+
# need to check both the input_artifacts and the
1380+
# pending properties
1381+
for in_art in n_obj.input_artifacts:
1382+
iid = in_art.id
1383+
if iid not in nodes and iid in extra_nodes:
1384+
nodes[iid] = extra_nodes[iid]
1385+
_add_edge(edges, nodes[iid], nodes[n_obj.id])
1386+
1387+
pending = n_obj.pending
1388+
for pred_id in pending:
1389+
for pname in pending[pred_id]:
1390+
in_node_id = '%s:%s' % (
1391+
pred_id, pending[pred_id][pname])
1392+
_add_edge(edges, nodes[in_node_id],
1393+
nodes[n_obj.id])
1394+
13831395
elif n_type == 'type':
13841396
# Connect this 'future artifact' with the job that will
13851397
# generate it

qiita_db/handlers/processing_job.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,9 @@ def post(self, job_id):
146146
cmd, values_dict={'job_id': job_id,
147147
'payload': self.request.body.decode(
148148
'ascii')})
149-
job = qdb.processing_job.ProcessingJob.create(job.user, params)
149+
# complete_job are unique so it is fine to force them to be created
150+
job = qdb.processing_job.ProcessingJob.create(
151+
job.user, params, force=True)
150152
job.submit()
151153

152154
self.finish()

qiita_db/handlers/tests/test_processing_job.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ def test_post_job_success(self):
233233
self.assertIsNotNone(cj)
234234
# additionally we can test that job.print_trace is correct
235235
self.assertEqual(job.trace, [
236-
f'{job.id} [Not Available]: Validate | '
236+
f'{job.id} [Not Available] (success): Validate | '
237237
'-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000',
238-
f' {cj.id} [{cj.external_id}] | '
238+
f' {cj.id} [{cj.external_id}] (success)| '
239239
'-p qiita -N 1 -n 1 --mem 16gb --time 10:00:00 --nice=10000'])
240240

241241
def test_post_job_success_with_archive(self):

qiita_db/meta_util.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,18 @@
3737
from re import sub
3838
from json import loads, dump, dumps
3939

40-
from qiita_db.util import create_nested_path
40+
from qiita_db.util import create_nested_path, retrieve_resource_data
41+
from qiita_db.util import resource_allocation_plot
4142
from qiita_core.qiita_settings import qiita_config, r_client
4243
from qiita_core.configuration_manager import ConfigurationManager
4344
import qiita_db as qdb
4445

46+
# global constant list used in resource_allocation_page
47+
COLUMNS = [
48+
"sName", "sVersion", "cID", "cName", "processing_job_id",
49+
"parameters", "samples", "columns", "input_size", "extra_info",
50+
"MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"]
51+
4552

4653
def _get_data_fpids(constructor, object_id):
4754
"""Small function for getting filepath IDS associated with data object
@@ -546,3 +553,101 @@ def generate_plugin_releases():
546553
# important to "flush" variables to avoid errors
547554
r_client.delete(redis_key)
548555
f(redis_key, v)
556+
557+
558+
def get_software_commands(active):
559+
software_list = [s for s in qdb.software.Software.iter(active=active)]
560+
software_commands = defaultdict(lambda: defaultdict(list))
561+
562+
for software in software_list:
563+
sname = software.name
564+
sversion = software.version
565+
commands = software.commands
566+
567+
for command in commands:
568+
software_commands[sname][sversion].append(command.name)
569+
software_commands[sname] = dict(software_commands[sname])
570+
571+
return dict(software_commands)
572+
573+
574+
def update_resource_allocation_redis(active=True):
575+
"""Updates redis with plots and information about current software.
576+
577+
Parameters
578+
----------
579+
active: boolean, optional
580+
Defaults to True. Should only be False when testing.
581+
582+
"""
583+
time = datetime.now().strftime('%m-%d-%y')
584+
scommands = get_software_commands(active)
585+
redis_key = 'resources:commands'
586+
r_client.set(redis_key, str(scommands))
587+
588+
for sname, versions in scommands.items():
589+
for version, commands in versions.items():
590+
for cname in commands:
591+
col_name = "samples * columns"
592+
df = retrieve_resource_data(cname, sname, version, COLUMNS)
593+
if len(df) == 0:
594+
continue
595+
596+
fig, axs = resource_allocation_plot(df, cname, sname, col_name)
597+
titles = [0, 0]
598+
images = [0, 0]
599+
600+
# Splitting 1 image plot into 2 separate for better layout.
601+
for i, ax in enumerate(axs):
602+
titles[i] = ax.get_title()
603+
ax.set_title("")
604+
# new_fig, new_ax – copy with either only memory plot or
605+
# only time
606+
new_fig = plt.figure()
607+
new_ax = new_fig.add_subplot(111)
608+
609+
scatter_data = ax.collections[0]
610+
new_ax.scatter(scatter_data.get_offsets()[:, 0],
611+
scatter_data.get_offsets()[:, 1],
612+
s=scatter_data.get_sizes(), label="data")
613+
614+
line = ax.lines[0]
615+
new_ax.plot(line.get_xdata(), line.get_ydata(),
616+
linewidth=1, color='orange')
617+
618+
if len(ax.collections) > 1:
619+
failure_data = ax.collections[1]
620+
new_ax.scatter(failure_data.get_offsets()[:, 0],
621+
failure_data.get_offsets()[:, 1],
622+
color='red', s=3, label="failures")
623+
624+
new_ax.set_xscale('log')
625+
new_ax.set_yscale('log')
626+
new_ax.set_xlabel(ax.get_xlabel())
627+
new_ax.set_ylabel(ax.get_ylabel())
628+
new_ax.legend(loc='upper left')
629+
630+
new_fig.tight_layout()
631+
plot = BytesIO()
632+
new_fig.savefig(plot, format='png')
633+
plot.seek(0)
634+
img = 'data:image/png;base64,' + quote(
635+
b64encode(plot.getvalue()).decode('ascii'))
636+
images[i] = img
637+
plt.close(new_fig)
638+
plt.close(fig)
639+
640+
# SID, CID, col_name
641+
values = [
642+
("img_mem", images[0], r_client.set),
643+
("img_time", images[1], r_client.set),
644+
('time', time, r_client.set),
645+
("title_mem", titles[0], r_client.set),
646+
("title_time", titles[1], r_client.set)
647+
]
648+
649+
for k, v, f in values:
650+
redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (
651+
cname, sname, version, col_name, k)
652+
r_client.delete(redis_key)
653+
f(redis_key, v)

qiita_db/metadata_template/prep_template.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def create(cls, md_template, study, data_type, investigation_type=None,
135135
# data_type being created - if possible
136136
if investigation_type is None:
137137
if data_type_str in TARGET_GENE_DATA_TYPES:
138-
investigation_type = 'Amplicon'
138+
investigation_type = 'AMPLICON'
139139
elif data_type_str == 'Metagenomic':
140140
investigation_type = 'WGS'
141141
elif data_type_str == 'Metatranscriptomic':
@@ -272,6 +272,32 @@ def delete(cls, id_):
272272
"Cannot remove prep template %d because it has an artifact"
273273
" associated with it" % id_)
274274

275+
# artifacts that are archived are not returned as part of the code
276+
# above and we need to clean them before moving forward
277+
sql = """SELECT artifact_id
278+
FROM qiita.preparation_artifact
279+
WHERE prep_template_id = %s"""
280+
qdb.sql_connection.TRN.add(sql, args)
281+
archived_artifacts = set(
282+
qdb.sql_connection.TRN.execute_fetchflatten())
283+
ANALYSIS = qdb.analysis.Analysis
284+
if archived_artifacts:
285+
for aid in archived_artifacts:
286+
# before we can delete the archived artifact, we need
287+
# to delete the analyses where they were used.
288+
sql = """SELECT analysis_id
289+
FROM qiita.analysis
290+
WHERE analysis_id IN (
291+
SELECT DISTINCT analysis_id
292+
FROM qiita.analysis_sample
293+
WHERE artifact_id IN %s)"""
294+
qdb.sql_connection.TRN.add(sql, [tuple([aid])])
295+
analyses = set(
296+
qdb.sql_connection.TRN.execute_fetchflatten())
297+
for _id in analyses:
298+
ANALYSIS.delete_analysis_artifacts(_id)
299+
qdb.artifact.Artifact.delete(aid)
300+
275301
# Delete the prep template filepaths
276302
sql = """DELETE FROM qiita.prep_template_filepath
277303
WHERE prep_template_id = %s"""
@@ -815,6 +841,9 @@ def _get_predecessors(workflow, node):
815841
pred.append(data)
816842
return pred
817843

844+
# this is only helpful for when there are no _get_predecessors
845+
return pred
846+
818847
# Note: we are going to use the final BIOMs to figure out which
819848
# processing is missing from the back/end to the front, as this
820849
# will prevent generating unnecessary steps (AKA already provided
@@ -937,6 +966,8 @@ def _get_predecessors(workflow, node):
937966
if set(merging_schemes[info]) >= set(cxns):
938967
init_artifacts = merging_schemes[info]
939968
break
969+
if not predecessors:
970+
pnode = node
940971
if init_artifacts is None:
941972
pdp = pnode.default_parameter
942973
pdp_cmd = pdp.command

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ def _common_creation_checks(self, pt, fp_count, name):
911911
self.assertEqual(pt.data_type(), self.data_type)
912912
self.assertEqual(pt.data_type(ret_id=True), self.data_type_id)
913913
self.assertEqual(pt.artifact, None)
914-
self.assertEqual(pt.investigation_type, 'Amplicon')
914+
self.assertEqual(pt.investigation_type, 'AMPLICON')
915915
self.assertEqual(pt.study_id, self.test_study.id)
916916
self.assertEqual(pt.status, "sandbox")
917917
exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id,
@@ -1076,7 +1076,7 @@ def test_create_warning(self):
10761076
self.assertEqual(pt.data_type(), self.data_type)
10771077
self.assertEqual(pt.data_type(ret_id=True), self.data_type_id)
10781078
self.assertEqual(pt.artifact, None)
1079-
self.assertEqual(pt.investigation_type, 'Amplicon')
1079+
self.assertEqual(pt.investigation_type, 'AMPLICON')
10801080
self.assertEqual(pt.study_id, self.test_study.id)
10811081
self.assertEqual(pt.status, 'sandbox')
10821082
exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id,
@@ -1247,7 +1247,7 @@ def test_investigation_type_setter(self):
12471247
"""Able to update the investigation type"""
12481248
pt = qdb.metadata_template.prep_template.PrepTemplate.create(
12491249
self.metadata, self.test_study, self.data_type_id)
1250-
self.assertEqual(pt.investigation_type, 'Amplicon')
1250+
self.assertEqual(pt.investigation_type, 'AMPLICON')
12511251
pt.investigation_type = "Other"
12521252
self.assertEqual(pt.investigation_type, 'Other')
12531253
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):

0 commit comments

Comments
 (0)