Skip to content

Commit

Permalink
fix: properly error in Counts app and combine step
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Apr 13, 2021
1 parent dbb4faa commit 81f2267
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 85 deletions.
1 change: 1 addition & 0 deletions stjude_warden_bam/dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
{
"name": "Genome",
"class": "string",
"optional": false,
"choices": [
"Human_hg38_v31 : hg38 Gencode v31",
"Human_hg19_v19 : hg19 Gencode v19",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
OUTCOUNTS = open(out_counts_file, 'w')
OUTSAMPLES = open(out_samples_file, 'w')
ERRORS = open("errors.txt", 'w')
warnings = []

SAMPLEFILES = open(sample_files_list)
COUNTFILES = open(count_files_lists)
Expand All @@ -22,7 +21,7 @@
sample_lines = {}
feature_list = {}

print "Looping SAMPLEFILES"
print("Looping SAMPLEFILES")
for sample_file in SAMPLEFILES:

sample_file = sample_file.rstrip("\n")
Expand Down Expand Up @@ -54,10 +53,10 @@

counter_samples = {}
got_feature_list = False
print "done looping sample files\n"
print "Samples:"
print("done looping sample files\n")
print("Samples:")
print(samples)
print ""
print("")
count_results = {}

for count_file in COUNTFILES:
Expand All @@ -67,23 +66,18 @@
count_header = count_header.rstrip("\n")
count_header_list = count_header.split("\t")
print(count_header)
feature_num = 0
for count_line in COUNTFILE:
for feature_num, count_line in enumerate(COUNTFILE):
count_line = count_line.rstrip("\n")
count_data = count_line.split("\t")
feature = count_data[0]
if feature[0:2] == "__":
continue
# if we arleady got a feature list .. check if same order
if got_feature_list:
if feature_num not in feature_list:
print "FEATURE LISTS FROM COUNT FILES ARE OF DIFFERENT LENGTHS"
ERRORS.write("FEATURE LISTS FROM COUNT FILES ARE OF DIFFERENT LENGTHS")
sys.exit()
old_feature = feature_list[feature_num]
# features should be the same
if feature != old_feature:
print "FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME"
print("FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME")
ERRORS.write("FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME")
sys.exit()
else:
Expand All @@ -94,20 +88,20 @@
for sc in range(1, len(count_data)):
sample = count_header_list[sc]
if sample not in samples:
print "SAMPLE " + sample + " not in a sample sheet"
warnings.append("SAMPLE " + sample + " not in a sample sheet")
print("SAMPLE " + sample + " not in a sample sheet\nDoes the file have a header?")
ERRORS.write("SAMPLE " + sample + " not in a sample sheet\nDoes the file have a header?")
sys.exit()

count = count_data[sc]
if sample in count_results[feature]:
if count != count_results[feature][sample]:
print "TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES"
print("TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES")
ERRORS.write("TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES")
sys.exit("")
sys.exit()
else:
count_results[feature][sample] = count
feature_num += 1
got_feature_list = True
print "done getting features"
print("done getting features")
samples_ordered = []
for key, value in sorted(samples.iteritems(), key=lambda (k, v): (v, k)):
samples_ordered.append(key)
Expand All @@ -132,9 +126,3 @@
for s in samples_ordered:
sample_line = sample_lines[s]
OUTSAMPLES.write(sample_line + "\n")


if len(warnings) > 0:
WARN = open("warnings.txt", 'w')
warn_lines = "\n".join(warnings)
WARN.write(warn_lines + "\n")
6 changes: 3 additions & 3 deletions stjude_warden_bam/src/warden.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ main() {
elif [ "$sample_list_extension" == "xlsx" ]; then
python /usr/bin/parse_excel_sample_list.py sample_list.xlsx > sample_list.txt
else
dx-jobutil-report-error "Improper Sample List Extension. This should be a .txt or .xlsx file" appError
dx-jobutil-report-error "Improper Sample List Extension. This should be a .txt or .xlsx file" AppError
fi

printf '%s\n' "${BAM_FILES_path[@]}" > bam_list.txt
Expand All @@ -91,7 +91,7 @@ main() {
IS_PROCESSFILE_ERR=${#PROCESSFILE_ERR} #get size
if [ "$IS_PROCESSFILE_ERR" -gt 0 ]; then
echo "Error: $PROCESSFILE_ERR"
dx-jobutil-report-error "$PROCESSFILE_ERR" appError
dx-jobutil-report-error "$PROCESSFILE_ERR" AppError
fi

final_sample_list_id=$(dx upload --brief cleaned_sample_list.txt)
Expand Down Expand Up @@ -120,7 +120,7 @@ main() {
echo ""
if [ "$num_samples" -gt 64 ]; then
echo "Error: Number of samples greater than 64. The app limits samples to 64"
dx-jobutil-report-error "Number of samples greater than 64. The app limits samples to 64" appError
dx-jobutil-report-error "Number of samples greater than 64. The app limits samples to 64" AppError
fi
###############
{
Expand Down
6 changes: 4 additions & 2 deletions stjude_warden_counts/dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@
"Mouse_mm9_levels_1_2 : Gencode vM1 (levels 1+2)",
"Mouse_mm10_vM11_levels_1_2 : Gencode vM11 (levels 1+2)",
"D_Mel_BDGPr5 : Drosophila_melanogaster.BDGP5.75.gtf",
"D_Mel_BDGP6 : Drosophila_melanogaster.BDGP6.86.gtf"
"D_Mel_BDGP6 : Drosophila_melanogaster.BDGP6.86.gtf",
"None"
],
"optional": true,
"default": "None",
"optional": false,
"help": "Selection determines preloaded viewer. If left blank, no viewer shortcut will be created. First element (ie Human_hg38_v31) is a reference ID for app."
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
OUTCOUNTS = open(out_counts_file, 'w')
OUTSAMPLES = open(out_samples_file, 'w')
ERRORS = open("errors.txt", 'w')
warnings = []

SAMPLEFILES = open(sample_files_list)
COUNTFILES = open(count_files_lists)
Expand All @@ -22,7 +21,7 @@
sample_lines = {}
feature_list = {}

print "Looping SAMPLEFILES"
print("Looping SAMPLEFILES")
for sample_file in SAMPLEFILES:

sample_file = sample_file.rstrip("\n")
Expand Down Expand Up @@ -54,10 +53,10 @@

counter_samples = {}
got_feature_list = False
print "done looping sample files\n"
print "Samples:"
print("done looping sample files\n")
print("Samples:")
print(samples)
print ""
print("")
count_results = {}

for count_file in COUNTFILES:
Expand All @@ -67,23 +66,18 @@
count_header = count_header.rstrip("\n")
count_header_list = count_header.split("\t")
print(count_header)
feature_num = 0
for count_line in COUNTFILE:
for feature_num, count_line in enumerate(COUNTFILE):
count_line = count_line.rstrip("\n")
count_data = count_line.split("\t")
feature = count_data[0]
if feature[0:2] == "__":
continue
# if we arleady got a feature list .. check if same order
if got_feature_list:
if feature_num not in feature_list:
print "FEATURE LISTS FROM COUNT FILES ARE OF DIFFERENT LENGTHS"
ERRORS.write("FEATURE LISTS FROM COUNT FILES ARE OF DIFFERENT LENGTHS")
sys.exit()
old_feature = feature_list[feature_num]
# features should be the same
if feature != old_feature:
print "FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME"
print("FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME")
ERRORS.write("FEATURE LISTS BETWEEN COUNT FILES NOT THE SAME")
sys.exit()
else:
Expand All @@ -94,20 +88,20 @@
for sc in range(1, len(count_data)):
sample = count_header_list[sc]
if sample not in samples:
print "SAMPLE " + sample + " not in a sample sheet"
warnings.append("SAMPLE " + sample + " not in a sample sheet")
print("SAMPLE " + sample + " not in a sample sheet\nDoes the file have a header?")
ERRORS.write("SAMPLE " + sample + " not in a sample sheet\nDoes the file have a header?")
sys.exit()

count = count_data[sc]
if sample in count_results[feature]:
if count != count_results[feature][sample]:
print "TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES"
print("TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES")
ERRORS.write("TWO SAMPLES WITH SAME NAME HAVE DIFFERENT COUNT VALUES")
sys.exit("")
sys.exit()
else:
count_results[feature][sample] = count
feature_num += 1
got_feature_list = True
print "done getting features"
print("done getting features")
samples_ordered = []
for key, value in sorted(samples.iteritems(), key=lambda (k, v): (v, k)):
samples_ordered.append(key)
Expand All @@ -132,9 +126,3 @@
for s in samples_ordered:
sample_line = sample_lines[s]
OUTSAMPLES.write(sample_line + "\n")


if len(warnings) > 0:
WARN = open("warnings.txt", 'w')
warn_lines = "\n".join(warnings)
WARN.write(warn_lines + "\n")
2 changes: 1 addition & 1 deletion stjude_warden_counts/resources/usr/bin/create_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def build_workflow():
}
},
]
simple_DE_input = {"input_count_file": dxpy.dxlink({"stage": combine_counts_stage_id, "outputField": "count_file"}), "sample_list_file": dxpy.dxlink(final_sample_list_id), "contrasts_file": dxpy.dxlink(comparisons_all_id), "difex_viewer": limma_viewer_link}
simple_DE_input = {"input_count_file": dxpy.dxlink({"stage": combine_counts_stage_id, "outputField": "count_file"}), "sample_list_file": dxpy.dxlink(final_sample_list_id), "contrasts_file": dxpy.dxlink(comparisons_all_id)}
if parameters["limma_DE_viewer"] != "None":
simple_DE_input["difex_viewer"] = limma_viewer_link
simple_DE_stage_id = wf.add_stage(simple_DE_applet, stage_input=simple_DE_input, instance_type="azure:mem1_ssd1_x4", name="SIMPLE DIFFERENTIAL_EXPRESSION")
Expand Down
5 changes: 5 additions & 0 deletions stjude_warden_counts/resources/usr/bin/process_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def process_comparisons(comparison_line, comparison_list):
sys.exit()

sample_name = safe_name(line_data[0])
if sample_name != line_data[0]:
PROCESSERRORS.write("Malformed sample name: " + line_data[0] + "\n")
PROCESSERRORS.close()
sys.exit()

read_file1 = line_data[2]

if read_file1 in fastqs:
Expand Down
10 changes: 6 additions & 4 deletions stjude_warden_counts/src/warden.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ for app in /app_data/internal_source/*; do
done
echo "Applets built"
echo ""
if [ -n "$Genome" ]; then
if [ "$Genome" != "None" ]; then
Genome=$(echo "$Genome" | awk '{print $1}')
genome_json=/app_data/genome_data.json
limma_DE_viewer=$(./jq-1.6 --raw-output ".$Genome.viewers.LIMMA_DifEx_Viewer" $genome_json)
echo "DIFEXVIEWER: $limma_DE_viewer"
echo ""
else
limma_DE_viewer="None"
fi

############################INPUT FILES#############################################################
Expand All @@ -45,7 +47,7 @@ main() {
elif [ "$sample_list_extension" == "xlsx" ]; then
python /usr/bin/parse_excel_sample_list.py sample_list.xlsx > sample_list.txt
else
dx-jobutil-report-error "Improper Sample List Extension. This should be a .txt or .xlsx file" appError
dx-jobutil-report-error "Improper Sample List Extension. This should be a .txt or .xlsx file" AppError
fi

printf '%s\n' "${COUNT_FILES_path[@]}" > count_list.txt
Expand All @@ -60,7 +62,7 @@ main() {
IS_PROCESSFILE_ERR=${#PROCESSFILE_ERR} #get size
if [ "$IS_PROCESSFILE_ERR" -gt 0 ]; then
echo "Error: $PROCESSFILE_ERR"
dx-jobutil-report-error "$PROCESSFILE_ERR" appError
dx-jobutil-report-error "$PROCESSFILE_ERR" AppError
fi

final_sample_list_id=$(dx upload --brief cleaned_sample_list.txt)
Expand Down Expand Up @@ -89,7 +91,7 @@ main() {
echo ""
if [ "$num_samples" -gt 64 ]; then
echo "Error: Number of samples greater than 64. The app limits samples to 64"
dx-jobutil-report-error "Number of samples greater than 64. The app limits samples to 64" appError
dx-jobutil-report-error "Number of samples greater than 64. The app limits samples to 64" AppError
fi
###############
{
Expand Down
1 change: 1 addition & 0 deletions stjude_warden_fastq/dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
{
"name": "Genome",
"class": "string",
"optional": false,
"choices": [
"Human_hg38_v31 : hg38 Gencode v31",
"Human_hg19_v19 : hg19 Gencode v19",
Expand Down
Loading

0 comments on commit 81f2267

Please sign in to comment.