-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathvoronota-contacts
executable file
·309 lines (272 loc) · 8.15 KB
/
voronota-contacts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/bin/bash
function print_help_and_exit
{
cat >&2 << EOF
'voronota-contacts' script provides a way for calculating and querying interatomic contacts
with just one command (without the need to construct a pipeline from 'voronota' calls).
Basic options:
--input | -i string * input structure file in PDB or mmCIF format
--input-filter-query string input atoms filtering query parameters
--contacts-query string contacts query parameters
--contacts-query-additional string additional, preceeding query parameters, default is '--match-min-seq-sep 1'
--cache-dir string path to cache directory
--sum-at-end flag to print sum of areas as the last line in output
--tsv-output flag to output table in tab-separated values format with header
--help | -h flag to display help message and exit
Advanced options:
--output-drawing string output file with drawing script for PyMol
--drawing-parameters string drawing parameters
--wireframe-drawing flag to draw wireframe representation of contacts
--multiple-models flag to handle multiple models in PDB file
--use-hbplus flag to run 'hbplus' to tag H-bonds
Standard output (multiple lines):
{contacting atom} {contacting atom} {contact area} {distance between atoms centers} {tags} {adjunct values}
EOF
exit 1
}
readonly ZEROARG=$0
if [[ $ZEROARG == *"/"* ]]
then
cd "$(dirname ${ZEROARG})"
export PATH="$(pwd):${PATH}"
cd - &> /dev/null
fi
export LC_ALL=C
command -v voronota &> /dev/null || { echo >&2 "Error: 'voronota' executable not in binaries path"; exit 1; }
command -v voronota-resources &> /dev/null || { echo >&2 "Error: 'voronota-resources' executable not in binaries path"; exit 1; }
INFILE=""
INPUT_FILTER_QUERY_PARAMETERS=""
CONTACTS_QUERY_FIRST_PARAMETERS="--match-min-seq-sep 1"
CONTACTS_QUERY_SECOND_PARAMETERS=""
DRAWING_OUTPUT=""
DRAWING_PARAMETERS=""
CONTACTS_CACHE_DIRECTORY=""
MULTIPLE_MODELS_CHAINS_OPTION=""
SUM_AT_END=false
TSV_OUTPUT=false
WIREFRAME_DRAWING=false
USE_HBPLUS=false
HELP_MODE=false
while [[ $# > 0 ]]
do
OPTION="$1"
OPTARG="$2"
shift
case $OPTION in
-i|--input)
INFILE="$OPTARG"
shift
;;
--input-filter-query)
INPUT_FILTER_QUERY_PARAMETERS="$OPTARG"
shift
;;
--contacts-query-additional)
CONTACTS_QUERY_FIRST_PARAMETERS="$OPTARG"
shift
;;
--contacts-query)
CONTACTS_QUERY_SECOND_PARAMETERS="$OPTARG"
shift
;;
--output-drawing)
DRAWING_OUTPUT="$OPTARG"
shift
;;
--drawing-parameters)
DRAWING_PARAMETERS="$OPTARG"
shift
;;
--cache-dir)
CONTACTS_CACHE_DIRECTORY="$OPTARG"
shift
;;
--multiple-models)
MULTIPLE_MODELS_CHAINS_OPTION="--multimodel-chains"
;;
--sum-at-end)
SUM_AT_END=true
;;
--tsv-output)
TSV_OUTPUT=true
;;
--wireframe-drawing)
WIREFRAME_DRAWING=true
;;
--use-hbplus)
USE_HBPLUS=true
;;
-h|--help)
HELP_MODE=true
;;
*)
echo >&2 "Error: invalid command line option '$OPTION'"
exit 1
;;
esac
done
if [ -z "$INFILE" ] || $HELP_MODE
then
print_help_and_exit
fi
if $USE_HBPLUS
then
command -v hbplus &> /dev/null || { echo >&2 "Warning: 'hbplus' executable not in binaries path, thus no 'hb' tags will be assigned"; USE_HBPLUS=false; }
fi
MD5SUM_COMMAND="md5sum"
if command -v md5sum &> /dev/null
then
MD5SUM_COMMAND="md5sum"
else
MD5SUM_COMMAND="md5"
fi
command -v $MD5SUM_COMMAND &> /dev/null || { echo >&2 "Error: 'md5sum' or 'md5' executable not in binaries path"; exit 1; }
if [ ! -s "$INFILE" ]
then
echo >&2 "Error: input file does not exist"
exit 1
fi
readonly TMPLDIR=$(mktemp -d)
trap "rm -r $TMPLDIR" EXIT
voronota-resources radii > "$TMPLDIR/radii"
if [ ! -s "$TMPLDIR/radii" ]
then
echo >&2 "Error: failed to get the predefined atomic radii"
exit 1
fi
{
if [[ "$INFILE" == *".gz" ]]
then
zcat "$INFILE"
else
cat "$INFILE"
fi
} \
| voronota get-balls-from-atoms-file \
--input-format detect \
--annotated $MULTIPLE_MODELS_CHAINS_OPTION \
--radii-file $TMPLDIR/radii \
--include-heteroatoms \
| voronota query-balls \
--drop-altloc-indicators \
| voronota query-balls $INPUT_FILTER_QUERY_PARAMETERS \
> $TMPLDIR/balls
if [ ! -s "$TMPLDIR/balls" ]
then
echo >&2 "Error: no atoms in input file"
exit 1
fi
{
echo "Unfiltered contacts with tags version 4"
if $USE_HBPLUS
then
echo "Using 'hbplus' output"
fi
} > $TMPLDIR/hashsalt
BALLS_MD5=""
if [ -n "$CONTACTS_CACHE_DIRECTORY" ]
then
BALLS_MD5=$(cat $TMPLDIR/hashsalt $TMPLDIR/balls | $MD5SUM_COMMAND | awk '{print $1}')
if [ -n "$BALLS_MD5" ]
then
BALLS_MD5="${BALLS_MD5}.voronota.contacts"
if [ -s "$CONTACTS_CACHE_DIRECTORY/$BALLS_MD5" ]
then
cp $CONTACTS_CACHE_DIRECTORY/$BALLS_MD5 $TMPLDIR/all_contacts
fi
fi
fi
if [ -n "$DRAWING_OUTPUT" ] && [ -s "$TMPLDIR/all_contacts" ]
then
GRAPHICS_TOKEN=$(cat $TMPLDIR/all_contacts | head -1 | awk '{print $8}')
if [ -z "$GRAPHICS_TOKEN" ]
then
rm $TMPLDIR/all_contacts
fi
fi
if [ ! -s "$TMPLDIR/all_contacts" ]
then
OPTIONAL_DRAW_FLAG=""
if [ -n "$DRAWING_OUTPUT" ]
then
OPTIONAL_DRAW_FLAG="--draw"
fi
OPTIONAL_HBPLUS_TAGS_SETTING=""
if $USE_HBPLUS
then
mkdir -p "$TMPLDIR/hbplus_workdir"
cat "$TMPLDIR/balls" | voronota write-balls-to-atoms-file --pdb-output "$TMPLDIR/hbplus_workdir/struct.pdb" > /dev/null
( cd "$TMPLDIR/hbplus_workdir" ; hbplus ./struct.pdb &> /dev/null ; cd - &> /dev/null )
if [ ! -s "$TMPLDIR/hbplus_workdir/struct.hb2" ]
then
echo >&2 "Error: failed to run hbplus"
exit 1
fi
OPTIONAL_HBPLUS_TAGS_SETTING="--set-hbplus-tags $TMPLDIR/hbplus_workdir/struct.hb2"
fi
cat $TMPLDIR/balls \
| voronota calculate-contacts --annotated $OPTIONAL_DRAW_FLAG \
| voronota query-contacts --preserve-graphics $OPTIONAL_HBPLUS_TAGS_SETTING \
| voronota query-contacts --preserve-graphics \
--set-tags 'sb' --match-first 'R<ASP,GLU>&A<OD1,OD2,OE1,OE2,OXT>' --match-second 'R<ARG,HIS,LYS>&A<NH1,NH2,ND1,NE2,NZ>' --match-max-dist 4.0 \
| voronota query-contacts --preserve-graphics \
--set-tags 'ds' --match-first 'R<CYS>&A<SG>' --match-second 'R<CYS>&A<SG>' --match-max-dist 3.0 \
> $TMPLDIR/all_contacts
if [ ! -s "$TMPLDIR/all_contacts" ]
then
echo >&2 "Error: failed to calculate contacts"
exit 1
fi
if [ -n "$CONTACTS_CACHE_DIRECTORY" ] && [ -n "$BALLS_MD5" ]
then
mkdir -p $CONTACTS_CACHE_DIRECTORY
cp $TMPLDIR/all_contacts $CONTACTS_CACHE_DIRECTORY/$BALLS_MD5
fi
fi
{
if [ -n "$DRAWING_OUTPUT" ]
then
cat $TMPLDIR/all_contacts \
| voronota query-contacts $CONTACTS_QUERY_FIRST_PARAMETERS \
--preserve-graphics \
| voronota query-contacts $CONTACTS_QUERY_SECOND_PARAMETERS \
--preserve-graphics \
> $TMPLDIR/all_contacts_for_drawing
if $WIREFRAME_DRAWING
then
cat $TMPLDIR/all_contacts_for_drawing \
| sed 's|_tfanc \S\+ \S\+ \S\+ \S\+ \S\+ \S\+|_lloop|g' \
| sed 's|_tstrip \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+||g' \
> $TMPLDIR/all_contacts_for_drawing_wireframe
mv $TMPLDIR/all_contacts_for_drawing_wireframe $TMPLDIR/all_contacts_for_drawing
fi
cat $TMPLDIR/all_contacts_for_drawing \
| voronota draw-contacts $DRAWING_PARAMETERS \
--drawing-for-pymol $DRAWING_OUTPUT \
> /dev/null
cat $TMPLDIR/all_contacts_for_drawing \
| sed 's/^\(\S\+\s\+\S\+\s\+\S\+\s\+\S\+\s\+\S\+\s\+\S\+\).*$/\1/'
else
cat $TMPLDIR/all_contacts \
| voronota query-contacts $CONTACTS_QUERY_FIRST_PARAMETERS \
| voronota query-contacts $CONTACTS_QUERY_SECOND_PARAMETERS
fi
} > $TMPLDIR/result_contacts
if $SUM_AT_END
then
cat $TMPLDIR/result_contacts \
| voronota query-contacts --summarize \
> $TMPLDIR/result_contacts_summary
cat $TMPLDIR/result_contacts $TMPLDIR/result_contacts_summary > $TMPLDIR/result_contacts_final
else
cat $TMPLDIR/result_contacts > $TMPLDIR/result_contacts_final
fi
if $TSV_OUTPUT
then
{
echo "chain1 rnum1 ins1 anum1 alt1 rname1 aname1 chain2 rnum2 ins2 anum2 alt2 rname2 aname2 area dist tags adjuncts"
cat $TMPLDIR/result_contacts_final | voronota expand-descriptors
} | sed 's/\s\+/\t/g'
else
cat $TMPLDIR/result_contacts_final | column -t
fi