1919from itertools import product
2020from os .path import join , exists
2121from os import mkdir
22+ from collections import defaultdict
2223
2324from biom import load_table
2425from biom .util import biom_open
@@ -442,6 +443,37 @@ def mapping_file(self):
442443 else :
443444 return None
444445
446+ @property
447+ def metadata_categories (self ):
448+ """Returns all metadata categories in the current analyses based
449+ on the available studies
450+
451+ Returns
452+ -------
453+ dict of dict
454+ a dict with study_id as the key & the values are another dict with
455+ 'sample' & 'prep' as keys and the metadata categories as values
456+ """
457+ ST = qdb .metadata_template .sample_template .SampleTemplate
458+ PT = qdb .metadata_template .prep_template .PrepTemplate
459+ with qdb .sql_connection .TRN :
460+ sql = """SELECT DISTINCT study_id, artifact_id
461+ FROM qiita.analysis_sample
462+ LEFT JOIN qiita.study_artifact USING (artifact_id)
463+ WHERE analysis_id = %s"""
464+ qdb .sql_connection .TRN .add (sql , [self ._id ])
465+
466+ metadata = defaultdict (dict )
467+ for sid , aid in qdb .sql_connection .TRN .execute_fetchindex ():
468+ if sid not in metadata :
469+ metadata [sid ]['sample' ] = set (ST (sid ).categories )
470+ metadata [sid ]['prep' ] = set ()
471+ for pt in qdb .artifact .Artifact (aid ).prep_templates :
472+ metadata [sid ]['prep' ] = metadata [sid ]['prep' ] | set (
473+ PT (pt .id ).categories )
474+
475+ return metadata
476+
445477 @property
446478 def tgz (self ):
447479 """Returns the tgz file of the analysis
@@ -795,7 +827,7 @@ def remove_samples(self, artifacts=None, samples=None):
795827 qdb .sql_connection .TRN .add (sql , args , many = True )
796828 qdb .sql_connection .TRN .execute ()
797829
798- def build_files (self , merge_duplicated_sample_ids ):
830+ def build_files (self , merge_duplicated_sample_ids , categories = None ):
799831 """Builds biom and mapping files needed for analysis
800832
801833 Parameters
@@ -804,6 +836,8 @@ def build_files(self, merge_duplicated_sample_ids):
804836 If the duplicated sample ids in the selected studies should be
805837 merged or prepended with the artifact ids. If false prepends
806838 the artifact id
839+ categories : set of str, optional
840+ If not None, use _only_ these categories for the metaanalysis
807841
808842 Notes
809843 -----
@@ -858,7 +892,8 @@ def build_files(self, merge_duplicated_sample_ids):
858892 # We need to negate merge_duplicated_sample_ids because in
859893 # _build_mapping_file is acually rename: merge yes == rename no
860894 rename_dup_samples = not merge_duplicated_sample_ids
861- self ._build_mapping_file (samples , rename_dup_samples )
895+ self ._build_mapping_file (
896+ samples , rename_dup_samples , categories = categories )
862897
863898 if post_processing_cmds :
864899 biom_files = self ._build_biom_tables (
@@ -1034,7 +1069,8 @@ def _build_biom_tables(self,
10341069 # the user.
10351070 return biom_files
10361071
1037- def _build_mapping_file (self , samples , rename_dup_samples = False ):
1072+ def _build_mapping_file (self , samples , rename_dup_samples = False ,
1073+ categories = None ):
10381074 """Builds the combined mapping file for all samples
10391075 Code modified slightly from qiime.util.MetadataMap.__add__"""
10401076 with qdb .sql_connection .TRN :
@@ -1045,9 +1081,14 @@ def _build_mapping_file(self, samples, rename_dup_samples=False):
10451081 artifact = qdb .artifact .Artifact (aid )
10461082 si = artifact .study .sample_template
10471083 if si not in sample_infos :
1048- sample_infos [si ] = si .to_dataframe ()
1084+ si_df = si .to_dataframe ()
1085+ if categories is not None :
1086+ si_df = si_df [categories & set (si_df .columns )]
1087+ sample_infos [si ] = si_df
10491088 pt = artifact .prep_templates [0 ]
10501089 pt_df = pt .to_dataframe ()
1090+ if categories is not None :
1091+ pt_df = pt_df [categories & set (pt_df .columns )]
10511092
10521093 qm = pt_df .join (sample_infos [si ], lsuffix = "_prep" )
10531094
0 commit comments