Skip to content

Commit 83abf82

Browse files
authored
Merge pull request #96 from dib-lab/fix_ainput_err
fix assemblyinput for ppl specifying out_path
2 parents 4e664e9 + ee8c949 commit 83abf82

File tree

5 files changed

+66
-41
lines changed

5 files changed

+66
-41
lines changed

ep_utils/pipeline_defaults.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ basename: eelpond
66
# these outdirectories will be available to all workflows and rules
77
eelpond_directories:
88
rules: rules
9-
animals: animals
9+
animals: ep_utils/animals
1010
logs: logs
1111
outdirs:
1212
input_data: input_data

ep_utils/utils.py

+25
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,31 @@ def update_nested_dict(d, other):
7272
def is_single_end(sample, unit, end = '', assembly = ''):
7373
return pd.isnull(samples.loc[(sample, unit), "fq2"])
7474

75+
def handle_assemblyinput(assembInput, config):
76+
extensions= {}
77+
program_params = config['assemblyinput'].get('program_params')
78+
assemblyfile = program_params.get('assembly', None)
79+
if assemblyfile:
80+
assert os.path.exists(assemblyfile), 'Error: cannot find input assembly at {}\n'.format(assemblyfile)
81+
sys.stderr.write('\tFound input assembly at {}\n'.format(assemblyfile))
82+
assemblyfile = os.path.realpath(assemblyfile)
83+
else:
84+
sys.stderr.write("\n\tError: trying to run `assemblyinput` workflow, but there's no assembly file specified in your configfile. Please fix.\n\n")
85+
gtmap = program_params.get('gene_trans_map', '')
86+
if gtmap:
87+
assert os.path.exists(gtmap), 'Error: cannot find assembly gene_trans_map at {}\n'.format(gtmap)
88+
sys.stderr.write('\tFound input assembly gene-transcript map at {}\n'.format(gtmap))
89+
extensions = {'base': ['.fasta', '.fasta.gene_trans_map']}
90+
gtmap = os.path.realpath(gtmap)
91+
else:
92+
program_params['gene_trans_map'] = ''
93+
config['no_gene_trans_map']= True
94+
# grab user-input assembly extension
95+
input_assembly_extension = program_params.get('assembly_extension', '_input')
96+
extensions['assembly_extensions'] = [input_assembly_extension]
97+
config['assemblyinput'] = {'program_params': program_params, 'eelpond_params': {'extensions':extensions}}
98+
return config, input_assembly_extension
99+
75100
def generate_targs(outdir, basename, samples, assembly_exts=[''], base_exts = None, read_exts = None, other_exts = None, contrasts = []):
76101
base_targets, read_targets, other_targs = [],[],[]
77102
# handle read targets

rules/deseq2/deseq2.rule

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ deseq_params = config['deseq2']['program_params']
77
ep_params = config['deseq2']['eelpond_params']
88
deseq_dir = config['deseq2']['eelpond_params']['outdir']
99

10+
# this is handled in `run_eelpond`, but redundancy doesn't hurt
11+
if config.get('no_gene_trans_map', False):
12+
deseq_params['gene_trans_map'] = False
13+
1014
def get_deseq2_threads(wildcards=None):
1115
# https://github.com/snakemake-workflows/rna-seq-star-deseq2
1216
# https://twitter.com/mikelove/status/918770188568363008

rules/utils/assemblyinput.rule

+13-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,26 @@ assembly_dir = ep_params['outdir']
88
basename = config['basename']
99
assembly_extension = aInput_params['assembly_extension']
1010

11+
1112
rule cp_input_assembly:
1213
input: aInput_params['assembly']
1314
output: join(assembly_dir, basename + assembly_extension + '.fasta')
1415
log: join(LOGS_DIR, 'assemblyinput/cp_assembly.log')
15-
shell: ("cp {input} {output}")
16+
params: assembly_dir = ep_params['outdir']
17+
shell:
18+
"""
19+
mkdir -p {params.assembly_dir}
20+
cp {input} {output}
21+
"""
1622

1723
if aInput_params.get('gene_trans_map', None):
1824
rule cp_input_gene_trans_map:
1925
input: aInput_params['gene_trans_map']
2026
output: join(assembly_dir, basename + assembly_extension + '.fasta.gene_trans_map')
2127
log: join(LOGS_DIR, 'assemblyinput/cp_gt_map.log')
22-
shell: ("cp {input} {output}")
28+
params: assembly_dir = ep_params['outdir']
29+
shell:
30+
"""
31+
mkdir -p {params.assembly_dir}
32+
cp {input} {output}
33+
"""

run_eelpond

+23-38
Original file line numberDiff line numberDiff line change
@@ -60,42 +60,42 @@ def build_default_params(workdir, targets):
6060
defaultParams['assembly_extensions'] = list(set(assembly_extensions))
6161
return defaultParams
6262

63+
6364
def build_dirs(ep_dir, params):
65+
''' function to build full paths for all directories '''
6466
# build main eelpond dir info
65-
rules_dir = params['eelpond_directories']['rules']
6667
params['eelpond_directories']['base_dir'] = ep_dir
67-
params['eelpond_directories']['rules'] = os.path.join(ep_dir, rules_dir)
68-
animals_dir = params['eelpond_directories']['animals']
69-
params['eelpond_directories']['animals'] = os.path.join(ep_dir, 'ep_utils', animals_dir)
70-
68+
params['eelpond_directories']['rules'] = os.path.join(ep_dir, params['eelpond_directories']['rules'])
69+
params['eelpond_directories']['animals'] = os.path.join(ep_dir, params['eelpond_directories']['animals'])
7170
# if desired, user can also provide out_path, and all dirs will be built under there
72-
# outdirectory, build all directories relative to that path
7371
out_path = params.get('out_path', ep_dir)
74-
# if user inputs an absolute path:
75-
if os.path.isabs(out_path): # if not absolute, just assume subdirectory of eelpond.
72+
out_path = os.path.expanduser(out_path) # expand any `~` on unix
73+
if os.path.isabs(out_path): # if user inputs an absolute path, check that it exists!
7674
assert os.path.exists(out_path) and os.path.isdir(out_path), f"Error: provided output path {out_path} is not an existing directory. Please fix.\n\n"
77-
75+
else: # if not absolute, assume subdirectory of base eelpond dir
76+
out_path = os.path.join(ep_dir, out_path)
7877
# allow user to define basename, and experiment, build outdir name
7978
basename = params['basename']
8079
expt = params.get('experiment', '')
8180
if expt and not expt.startswith('_'):
8281
expt= '_' + expt
8382
outdir = basename + "_out" + expt
84-
8583
# Now join out_path, outdir name
84+
out_path = os.path.realpath(out_path)
8685
outdir = os.path.join(out_path, outdir)
87-
88-
params['eelpond_directories']['out_dir'] = outdir # outdir NAME
89-
# if we want logs to be *within* individual outdirs, change logsdir here (else logs in eelpond dir)
90-
params['eelpond_directories']['logs'] = join(outdir, os.path.basename(params['eelpond_directories']['logs']))
86+
# when using out_path, need to manually build the outdir (snakemake will not automatically create it)
87+
if not os.path.exists(outdir):
88+
os.makedirs(outdir)
89+
# add full path info to the config
90+
params['eelpond_directories']['out_dir'] = outdir
91+
params['eelpond_directories']['logs'] = join(outdir, params['eelpond_directories']['logs'])
9192
# build dirs for main eelpond output directories
9293
outDirs = params['eelpond_directories']['outdirs']
9394
for targ, outD in outDirs.items():
9495
outDirs[targ] = os.path.join(outdir, outD)
9596
# put joined paths back in params file
9697
params['eelpond_directories']['outdirs'] = outDirs
9798
# build dirs for included rules
98-
9999
included_rules = params['include_rules']
100100
for rule in included_rules:
101101
prog = os.path.basename(rule).split('.rule')[0]
@@ -104,28 +104,6 @@ def build_dirs(ep_dir, params):
104104
params[prog]['eelpond_params']['outdir'] = os.path.join(outdir, prog_dir)
105105
return params
106106

107-
def handle_assemblyInput(assembInput, config):
108-
# find files
109-
program_params = config['assemblyinput'].get('program_params')
110-
assemblyfile = program_params.get('assembly', None)
111-
assert os.path.exists(assemblyfile), 'Error: cannot find input assembly at {}\n'.format(assemblyfile)
112-
sys.stderr.write('\tFound input assembly at {}\n'.format(assemblyfile))
113-
gtmap = program_params.get('gene_trans_map', '')
114-
extensions= {}
115-
if gtmap:
116-
assert os.path.exists(gtmap), 'Error: cannot find assembly gene_trans_map at {}\n'.format(gtmap)
117-
sys.stderr.write('\tFound input assembly gene-transcript map at {}\n'.format(gtmap))
118-
extensions = {'base': ['.fasta', '.fasta.gene_trans_map']} # kinda hacky. do this better
119-
else:
120-
program_params['gene_trans_map'] = ''
121-
# grab user-input assembly extension
122-
input_assembly_extension = program_params.get('assembly_extension', '')
123-
config['assemblyinput'] = {}
124-
config['assemblyinput']['program_params'] = program_params
125-
extensions['assembly_extensions'] = [input_assembly_extension]
126-
config['assemblyinput']['eelpond_params'] = {'extensions': extensions}
127-
return config, input_assembly_extension
128-
129107
def main(args):
130108

131109
thisdir = os.path.abspath(os.path.dirname(__file__))
@@ -188,7 +166,7 @@ def main(args):
188166
assembInput = configD.get('assemblyinput', None)
189167
if assembInput:
190168
targs+=['assemblyinput']
191-
configD, assemblyinput_ext = handle_assemblyInput(assembInput, configD)
169+
configD, assemblyinput_ext = handle_assemblyinput(assembInput, configD)
192170
else:
193171
assemblyinput_ext = None
194172
if 'assemblyinput' in targs and not assembInput:
@@ -225,6 +203,12 @@ def main(args):
225203
# add extension to overall assembly_extensions info
226204
if assemblyinput_ext: # note, need to do it here to prevent override with defaults
227205
paramsD['assembly_extensions'] = list(set(paramsD.get('assembly_extensions', []) + [assemblyinput_ext]))
206+
207+
# NOTE: I think this should be moved from here into an `input_checks` function that checks all appropriate inputs/outputs are specified for the rules in use, and provides appropriate links to docs to help guide the user.
208+
if paramsD.get('no_gene_trans_map', False):
209+
if paramsD.get('deseq2'):
210+
paramsD['deseq2']['program_params']['gene_trans_map'] = False
211+
sys.stderr.write("\tYou're using `assemblyinput` without specifying a gene-trans-map. Setting differential expression to transcript-level only. See https://dib-lab.github.io/eelpond/deseq2/for details.\n")
228212

229213
# use params to build directory structure
230214
paramsD = build_dirs(thisdir, paramsD)
@@ -247,6 +231,7 @@ def main(args):
247231
print('\tconfig: {}'.format(configfile))
248232
print('\tparams: {}'.format(paramsfile))
249233
print('\ttargets: {}'.format(repr(targs)))
234+
print('\toutput: {}'.format(repr(paramsD['eelpond_directories']['out_dir'])))
250235
print('\treport: {}'.format(repr(reportfile)))
251236
print('--------')
252237

0 commit comments

Comments
 (0)