Skip to content

Commit d659d40

Browse files
authored
Merge pull request #19701 from adityasharad/actions/bash-parsing-ranking-performance
Actions: Improve Bash parsing performance on command and string interpolations
2 parents 64ab7c7 + e48a7da commit d659d40

File tree

3 files changed

+185
-44
lines changed

3 files changed

+185
-44
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Fixed performance issues in the parsing of Bash scripts in workflow files,
5+
which led to out-of-disk errors when analysing certain workflow files with
6+
complex interpolations of shell commands or quoted strings.

actions/ql/lib/codeql/actions/Bash.qll

Lines changed: 98 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,64 @@ class BashShellScript extends ShellScript {
88
)
99
}
1010

11-
private string lineProducer(int i) {
12-
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", i)
13-
}
14-
15-
private predicate cmdSubstitutionReplacement(string cmdSubs, string id, int k) {
16-
exists(string line | line = this.lineProducer(k) |
17-
exists(int i, int j |
18-
cmdSubs =
19-
// $() cmd substitution
20-
line.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", i, j)
21-
.regexpReplaceAll("^\\$\\(", "")
22-
.regexpReplaceAll("\\)$", "") and
23-
id = "cmdsubs:" + k + ":" + i + ":" + j
24-
)
25-
or
26-
exists(int i, int j |
27-
// `...` cmd substitution
28-
cmdSubs =
29-
line.regexpFind("\\`[^\\`]+\\`", i, j)
30-
.regexpReplaceAll("^\\`", "")
31-
.regexpReplaceAll("\\`$", "") and
32-
id = "cmd:" + k + ":" + i + ":" + j
33-
)
11+
/**
12+
* Gets the line at 0-based index `lineIndex` within this shell script,
13+
* assuming newlines as separators.
14+
*/
15+
private string lineProducer(int lineIndex) {
16+
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", lineIndex)
17+
}
18+
19+
private predicate cmdSubstitutionReplacement(string command, string id, int lineIndex) {
20+
this.commandInSubstitution(lineIndex, command, id)
21+
or
22+
this.commandInBackticks(lineIndex, command, id)
23+
}
24+
25+
/**
26+
* Holds if there is a command substitution `$(command)` in
27+
* the line at `lineIndex` in the shell script,
28+
* and `id` is a unique identifier for this command.
29+
*/
30+
private predicate commandInSubstitution(int lineIndex, string command, string id) {
31+
exists(int occurrenceIndex, int occurrenceOffset |
32+
command =
33+
// Look for the command inside a $(...) command substitution
34+
this.lineProducer(lineIndex)
35+
.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", occurrenceIndex,
36+
occurrenceOffset)
37+
// trim starting $( - TODO do this in first regex
38+
.regexpReplaceAll("^\\$\\(", "")
39+
// trim ending ) - TODO do this in first regex
40+
.regexpReplaceAll("\\)$", "") and
41+
id = "cmdsubs:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
3442
)
3543
}
3644

37-
private predicate rankedCmdSubstitutionReplacements(int i, string old, string new) {
38-
old = rank[i](string old2 | this.cmdSubstitutionReplacement(old2, _, _) | old2) and
39-
this.cmdSubstitutionReplacement(old, new, _)
45+
/**
46+
* Holds if `command` is a command in backticks `` `...` `` in
47+
* the line at `lineIndex` in the shell script,
48+
* and `id` is a unique identifier for this command.
49+
*/
50+
private predicate commandInBackticks(int lineIndex, string command, string id) {
51+
exists(int occurrenceIndex, int occurrenceOffset |
52+
command =
53+
this.lineProducer(lineIndex)
54+
.regexpFind("\\`[^\\`]+\\`", occurrenceIndex, occurrenceOffset)
55+
// trim leading backtick - TODO do this in first regex
56+
.regexpReplaceAll("^\\`", "")
57+
// trim trailing backtick - TODO do this in first regex
58+
.regexpReplaceAll("\\`$", "") and
59+
id = "cmd:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
60+
)
61+
}
62+
63+
private predicate rankedCmdSubstitutionReplacements(int i, string command, string commandId) {
64+
// rank commands by their unique IDs
65+
commandId = rank[i](string c, string id | this.cmdSubstitutionReplacement(c, id, _) | id) and
66+
// since we cannot output (command, ID) tuples from the rank operation,
67+
// we need to work out the specific command associated with the resulting ID
68+
this.cmdSubstitutionReplacement(command, commandId, _)
4069
}
4170

4271
private predicate doReplaceCmdSubstitutions(int line, int round, string old, string new) {
@@ -64,31 +93,56 @@ class BashShellScript extends ShellScript {
6493
this.cmdSubstitutionReplacement(result, _, i)
6594
}
6695

96+
/**
97+
* Holds if `quotedStr` is a string in double quotes in
98+
* the line at `lineIndex` in the shell script,
99+
* and `id` is a unique identifier for this quoted string.
100+
*/
101+
private predicate doubleQuotedString(int lineIndex, string quotedStr, string id) {
102+
exists(int occurrenceIndex, int occurrenceOffset |
103+
// double quoted string
104+
quotedStr =
105+
this.cmdSubstitutedLineProducer(lineIndex)
106+
.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", occurrenceIndex, occurrenceOffset) and
107+
id =
108+
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
109+
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
110+
)
111+
}
112+
113+
/**
114+
* Holds if `quotedStr` is a string in single quotes in
115+
* the line at `lineIndex` in the shell script,
116+
* and `id` is a unique identifier for this quoted string.
117+
*/
118+
private predicate singleQuotedString(int lineIndex, string quotedStr, string id) {
119+
exists(int occurrenceIndex, int occurrenceOffset |
120+
// single quoted string
121+
quotedStr =
122+
this.cmdSubstitutedLineProducer(lineIndex)
123+
.regexpFind("'((?:\\\\.|[^'\\\\])*)'", occurrenceIndex, occurrenceOffset) and
124+
id =
125+
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
126+
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
127+
)
128+
}
129+
67130
private predicate quotedStringReplacement(string quotedStr, string id) {
68-
exists(string line, int k | line = this.cmdSubstitutedLineProducer(k) |
69-
exists(int i, int j |
70-
// double quoted string
71-
quotedStr = line.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", i, j) and
72-
id =
73-
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
74-
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
75-
)
131+
exists(int lineIndex |
132+
this.doubleQuotedString(lineIndex, quotedStr, id)
76133
or
77-
exists(int i, int j |
78-
// single quoted string
79-
quotedStr = line.regexpFind("'((?:\\\\.|[^'\\\\])*)'", i, j) and
80-
id =
81-
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
82-
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
83-
)
134+
this.singleQuotedString(lineIndex, quotedStr, id)
84135
) and
85136
// Only do this for strings that might otherwise disrupt subsequent parsing
86137
quotedStr.regexpMatch("[\"'].*[$\n\r'\"" + Bash::separator() + "].*[\"']")
87138
}
88139

89-
private predicate rankedQuotedStringReplacements(int i, string old, string new) {
90-
old = rank[i](string old2 | this.quotedStringReplacement(old2, _) | old2) and
91-
this.quotedStringReplacement(old, new)
140+
private predicate rankedQuotedStringReplacements(int i, string quotedString, string quotedStringId) {
141+
// rank quoted strings by their nearly-unique IDs
142+
quotedStringId = rank[i](string s, string id | this.quotedStringReplacement(s, id) | id) and
143+
// since we cannot output (string, ID) tuples from the rank operation,
144+
// we need to work out the specific string associated with the resulting ID
145+
this.quotedStringReplacement(quotedString, quotedStringId)
92146
}
93147

94148
private predicate doReplaceQuotedStrings(int line, int round, string old, string new) {
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
name: Workflow with complex interpolation
2+
on:
3+
workflow_dispatch:
4+
inputs:
5+
choice-a:
6+
required: true
7+
type: choice
8+
description: choice-a
9+
default: a1
10+
options:
11+
- a1
12+
- a2
13+
- a3
14+
string-b:
15+
required: false
16+
type: string
17+
description: string-b
18+
string-c:
19+
required: false
20+
type: string
21+
description: string-c
22+
list-d:
23+
required: true
24+
type: string
25+
default: d1 d2
26+
description: list-d whitespace separated
27+
list-e:
28+
required: false
29+
type: string
30+
description: list-e whitespace separated
31+
choice-f:
32+
required: true
33+
type: choice
34+
description: choice-f
35+
options:
36+
- false
37+
- true
38+
39+
env:
40+
DRY_TEST: false
41+
B: ${{ github.event.inputs.string-b }}
42+
43+
jobs:
44+
job:
45+
runs-on: ubuntu-latest
46+
steps:
47+
- name: Produce values
48+
id: produce-values
49+
run: |
50+
echo "region=region" >> $GITHUB_OUTPUT
51+
echo "zone=zone" >> $GITHUB_OUTPUT
52+
53+
- name: Step with complex interpolation
54+
id: complex
55+
env:
56+
CHOICE_A: ${{ github.event.inputs.choice-a }}
57+
STRING_B: ${{ github.event.inputs.string-b }}
58+
STRING_C: ${{ github.event.inputs.string-c }}
59+
LIST_D: ${{ github.event.inputs.list-d }}
60+
LIST_E: ${{ github.event.inputs.list-e }}
61+
CHOICE_F: ${{ github.event.inputs.choice-f }}
62+
REGION: ${{ steps.produce-values.outputs.region }}
63+
ZONE: ${{ steps.produce-values.outputs.zone }}
64+
DRY_TEST_JSON: ${{ fromJSON(env.DRY_TEST) }}
65+
FUNCTION_NAME: my-function
66+
USER_EMAIL: '[email protected]'
67+
TYPE: type
68+
RANGE: '0-100'
69+
70+
run: |
71+
comma_separated_list_d=$(echo "${LIST_D}" | sed "s/ /\",\"/g")
72+
comma_separated_list_e=$(echo "${LIST_E}" | sed "s/ /\",\"/g")
73+
c1=$(echo "${STRING_C}" | cut -d "-" -f 1)
74+
c2=$(echo "${STRING_C}" | cut -d "-" -f 2)
75+
# Similar commands that use JSON payloads with string interpolation.
76+
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
77+
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
78+
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
79+
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
80+
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":["'"${comma_separated_list_e}"'"],"dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
81+
shell: bash

0 commit comments

Comments
 (0)