qiita-spots · tanaes · Sep 28, 2016 · Sep 29, 2016 · Sep 29, 2016 · Sep 29, 2016
diff --git a/qp_shotgun/__init__.py b/qp_shotgun/__init__.py
@@ -9,8 +9,9 @@
 from qiita_client import QiitaPlugin, QiitaCommand
 
 from .humann2.humann2 import humann2
+from .kneaddata.kneaddata import kneaddata
 
-__all__ = ['humann2']
+__all__ = ['humann2', 'kneaddata']
 
 
 # Initialize the plugin

diff --git a/qp_shotgun/kneaddata/__init__.py b/qp_shotgun/kneaddata/__init__.py
@@ -0,0 +1,84 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+
+# Initialize the plugin
+plugin = QiitaPlugin(
+    'KneadData', '0.5.1', 'KneadData is a tool designed to perform quality '
+    'control on metagenomic and metatranscriptomic sequencing data, '
+    'especially data from microbiome experiments.')
+
+# Define the HUMAnN2 command
+req_params = {'input': ('artifact', ['per_sample_FASTQ'])}
+opt_params = {
+    # there are other parameters not included that will be ignored in this
+    # configuration as we assume that the correct values were set in the env
+    # by the admin installing the tools:
+    # trimmomatic
+    # bowtie2
+
+    # --input # input FASTQ file (add a second argument instance to run with paired input files)
+    # --output # directory to write output files
+    # --output-prefix # prefix for all output files [ DEFAULT : $SAMPLE_kneaddata ]
+    # --log # filepath for log [ DEFAULT : $OUTPUT_DIR/$SAMPLE_kneaddata.log ]
+    # --trimmomatic # path to trimmomatic executable
+    # --bowtie2 # path to bowtie executable
+    # --bmtagger # path to bmtagger exectuable
+    # --trf # path to TRF executable
+    'reference-db': ['choice:["human_genome"]', 'human_genome'], # ref db
+    'bypass-trim': ['bool', 'False'], # bypass the trim step
+    'threads': ['integer', '1'], # threads to run 
+    'processes': ['integer', '1'], # processes to run
+    'quality-scores': ['choice:["phred33","phred64"]', 'phred33'], # quality mapping
+    'run-bmtagger': ['bool', 'False'], # run BMTagger instead of Bowtie2
+    'run-trf': ['bool', 'False'], # run TRF repeat finder tool
+    'run-fastqc-start': ['bool', 'True'], # run FastQC on original data
+    'run-fastqc-end': ['bool', 'True'], # run FastQC on filtered data
+    'store-temp-output': ['bool', 'False'], # store temp output files
+    'log-level': ['choice:["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]',
+                  'DEBUG'],
+
+    # Trimmomatic options
+    'max-memory': ['integer', '500'], # max memory in mb [ DEFAULT : 500 ]
+    'trimmomatic-options': ['string', 'ILLUMINACLIP:$trimmomatic/adapters/'
+                            'TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 '
+                            'SLIDINGWINDOW:4:15 MINLEN:36'],
+
+    # Bowtie2 options
+    'bowtie2-options': ['string', '--very-sensitive']
+
+    # BMTagger options
+
+    # TRF options
+    'match': ['integer', '2'], # matching weight
+    'mismatch': ['integer', '7'], # mismatching penalty
+    'delta': ['integer', '7'], # indel penalty
+    'pm': ['integer', '80'], # match probability
+    'pi': ['integer', '10'], # indel probability
+    'minscore': ['integer', '50'], # mimimum alignment score to report
+    'maxperiod': ['integer', '500'] # maximum period size to report
+
+    # FastQC options
+    }
+outputs = {'per_sample_FASTQ': 'per_sample_FASTQ'}
+dflt_param_set = {
+    'Defaults': {
+        'reference-db': 'human_genome', 'bypass-trim': False, 'threads': 1,
+        'processes': 1, 'quality-scores': 'phred33', 'run-bmtagger': False,
+        'run-trf': False, 'run-fastqc-start': True, 'run-fastqc-end': True,
+        'store-temp-output': False, 'log-level': 'DEBUG', 'max-memory': 500,
+        'trimmomatic-options': 'ILLUMINACLIP:$trimmomatic/adapters/'
+                                'TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 '
+                                'SLIDINGWINDOW:4:15 MINLEN:36',
+        'bowtie2-options': '--very-sensitive', 'match': 2, 'mismatch': 7,
+        'delta': 7, 'pm': 80, 'pi': 10, 'minscore': 50, 'maxperiod': '500'}
+}
+kneaddata_cmd = QiitaCommand(
+    "KneadData", "Sequence QC", kneaddata, req_params, opt_params,
+    outputs, dflt_param_set)
+plugin.register_command(kneaddata_cmd)
diff --git a/qp_shotgun/kneaddata/kneaddata.py b/qp_shotgun/kneaddata/kneaddata.py
@@ -0,0 +1,59 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from os.path import basename, join
+
+from future.utils import viewitems
+import pandas as pd
+
+
+
+def kneaddata(qclient, job_id, parameters, out_dir):
+    """Run kneaddata with the given parameters
+
+    Parameters
+    ----------
+    qclient : tgp.qiita_client.QiitaClient
+        The Qiita server client
+    job_id : str
+        The job id
+    parameters : dict
+        The parameter values to run split libraries
+    out_dir : str
+        Yhe path to the job's output directory
+
+    Returns
+    -------
+    bool, list, str
+        The results of the job
+    """
+    # Step 1 get the rest of the information need to run kneaddata
+    qclient.update_job_step(job_id, "Step 1 of 3: Collecting information")
+    artifact_id = parameters['input_data']
+
+    # Get the artifact filepath information
+    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
+    fps = artifact_info['files']
+
+    # Get the artifact type
+    artifact_type = artifact_info['type']
+
+    # Get the artifact metadata
+    prep_info = qclient.get('/qiita_db/prep_template/%s/'
+                            % artifact_info['prep_information'][0])
+    qiime_map = prep_info['qiime-map']
+
+    # Step 2 generating command humann2
+    qclient.update_job_step(job_id, "Step 2 of 3: Generating kneaddata command")
+
+    # Step 3 execute humann2: TODO
+    qclient.update_job_step(job_id, "Step 3 of 3: Executing kneaddata")
+
+    artifacts_info = []
+
+    return True, artifacts_info, ""
diff --git a/qp_shotgun/kneaddata/tests/__init__.py b/qp_shotgun/kneaddata/tests/__init__.py
@@ -0,0 +1,7 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
diff --git a/qp_shotgun/kneaddata/tests/test_kneaddata.py b/qp_shotgun/kneaddata/tests/test_kneaddata.py
@@ -0,0 +1,62 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from unittest import TestCase, main
+from os import close, environ
+from tempfile import mkstemp
+from json import dumps
+
+from qiita_client import QiitaClient
+
+from qp_shotgun.humann2.humann2 import (
+    get_sample_names_by_run_prefix)
+
+from qp_shotgun.kneaddata.kneaddata import (
+    )
+
+
+CLIENT_ID = '19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4'
+CLIENT_SECRET = ('J7FfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2JKh'
+                 'AmmCWZuabe0O5Mp28s1')
+
+
+class KneaddataTests(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        server_cert = environ.get('QIITA_SERVER_CERT', None)
+        cls.qclient = QiitaClient("https://localhost:21174", CLIENT_ID,
+                                  CLIENT_SECRET, server_cert=server_cert)
+        cls.params = {}
+        cls._clean_up_files = []
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.qclient.post('/apitest/reset/')
+
+
+
+MAPPING_FILE = (
+    "#SampleID\tplatform\tbarcode\texperiment_design_description\t"
+    "library_construction_protocol\tcenter_name\tprimer\trun_prefix\t"
+    "instrument_model\tDescription\n"
+    "SKB7.640196\tILLUMINA\tA\tA\tA\tANL\tA\ts3\tIllumina MiSeq\tdesc1\n"
+    "SKB8.640193\tILLUMINA\tA\tA\tA\tANL\tA\ts1\tIllumina MiSeq\tdesc2\n"
+    "SKD8.640184\tILLUMINA\tA\tA\tA\tANL\tA\ts2\tIllumina MiSeq\tdesc3\n"
+)
+
+MAPPING_FILE_2 = (
+    "#SampleID\tplatform\tbarcode\texperiment_design_description\t"
+    "library_construction_protocol\tcenter_name\tprimer\t"
+    "run_prefix\tinstrument_model\tDescription\n"
+    "SKB7.640196\tILLUMINA\tA\tA\tA\tANL\tA\ts3\tIllumina MiSeq\tdesc1\n"
+    "SKB8.640193\tILLUMINA\tA\tA\tA\tANL\tA\ts1\tIllumina MiSeq\tdesc2\n"
+    "SKD8.640184\tILLUMINA\tA\tA\tA\tANL\tA\ts1\tIllumina MiSeq\tdesc3\n"
+)
+
+if __name__ == '__main__':
+    main()