Skip to content

Commit fd86997

Browse files
committed
parser: skip BOM at the beginning of input
... so that data format is properly recognized even when the file starts with BOM. Closes: #182
1 parent 43ae09a commit fd86997

File tree

7 files changed

+10877
-7
lines changed

7 files changed

+10877
-7
lines changed

src/lib/instream.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,37 @@ void InStream::handleError(const std::string &msg, const unsigned long line)
5757
InStreamLookAhead::InStreamLookAhead(
5858
InStream &input,
5959
const unsigned size,
60+
const bool skipBOM,
6061
bool skipWhiteSpaces)
6162
{
6263
std::istream &inStr = input.str();
6364

64-
// read `size` chars from input
65-
while (buf_.size() < size) {
66-
const int c = inStr.get();
67-
if (skipWhiteSpaces && isspace(c) && !!inStr)
65+
int c = inStr.get();
66+
if (skipBOM
67+
// try to read BOM ... [0xEF, 0xBB, 0xBF]
68+
&& (0xEF == c)
69+
&& (0xBB == (c = inStr.get()))
70+
&& (0xBF == (c = inStr.get())))
71+
// BOM successfully read -> read the next char
72+
c = inStr.get();
73+
74+
// read chars from input
75+
for (;;) {
76+
if (skipWhiteSpaces && isspace(c))
6877
// skip a white-space
69-
continue;
78+
goto next;
7079

7180
// only the leading white-spaces are skipped
7281
skipWhiteSpaces = false;
7382

83+
// append one char to the buffer
7484
buf_.push_back(c);
85+
if (size <= buf_.size())
86+
// the requested number of chars have been read
87+
break;
88+
next:
89+
// read the next char
90+
c = inStr.get();
7591
}
7692

7793
// put the chars back to the input stream

src/lib/instream.hh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ class InStreamLookAhead {
6262
InStreamLookAhead(
6363
InStream &input,
6464
unsigned size,
65-
bool skipWhiteSpaces = false);
65+
bool skipBOM,
66+
bool skipWhiteSpaces);
6667

6768
char operator[](const unsigned idx) const {
6869
return buf_.at(idx);

src/lib/parser.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ static inline std::unique_ptr<T> make_unique(InStream &input) {
3434
AbstractParserPtr createParser(InStream &input)
3535
{
3636
// skip all white-spaces and sniff the first two chars from the input
37-
InStreamLookAhead head(input, 2U, /* skipWhiteSpaces */ true);
37+
InStreamLookAhead head(input, 2U,
38+
/* skipBOM */ true,
39+
/* skipWhiteSpaces */ true);
3840

3941
switch (head[0]) {
4042
case '{':
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--mode=json

tests/csgrep/0125-sarif-parser-bom-stdin.txt

Lines changed: 10604 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
{
2+
"scan": {
3+
"analyzer-version-snyk-code": "1.0.0"
4+
},
5+
"defects": [
6+
{
7+
"checker": "SNYK_CODE_WARNING",
8+
"cwe": 290,
9+
"tool": "snyk-code",
10+
"key_event_idx": 0,
11+
"events": [
12+
{
13+
"file_name": "vendor/krb5-src/krb5/src/lib/krb5/krb/t_princ.c",
14+
"line": 381,
15+
"column": 26,
16+
"h_size": 22,
17+
"event": "note[cpp/WeakGuard]",
18+
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
19+
"verbosity_level": 0
20+
}
21+
]
22+
},
23+
{
24+
"checker": "SNYK_CODE_WARNING",
25+
"cwe": 290,
26+
"tool": "snyk-code",
27+
"key_event_idx": 0,
28+
"events": [
29+
{
30+
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
31+
"line": 140,
32+
"column": 21,
33+
"h_size": 12,
34+
"event": "note[cpp/WeakGuard]",
35+
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
36+
"verbosity_level": 0
37+
}
38+
]
39+
},
40+
{
41+
"checker": "SNYK_CODE_WARNING",
42+
"cwe": 290,
43+
"tool": "snyk-code",
44+
"key_event_idx": 0,
45+
"events": [
46+
{
47+
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
48+
"line": 145,
49+
"column": 21,
50+
"h_size": 13,
51+
"event": "note[cpp/WeakGuard]",
52+
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
53+
"verbosity_level": 0
54+
}
55+
]
56+
},
57+
{
58+
"checker": "SNYK_CODE_WARNING",
59+
"cwe": 190,
60+
"tool": "snyk-code",
61+
"key_event_idx": 0,
62+
"events": [
63+
{
64+
"file_name": "vendor/sasl2-sys/sasl2/utils/smtptest.c",
65+
"line": 360,
66+
"column": 3,
67+
"h_size": 4,
68+
"event": "note[cpp/IntegerOverflow/test]",
69+
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
70+
"verbosity_level": 0
71+
}
72+
]
73+
},
74+
{
75+
"checker": "SNYK_CODE_WARNING",
76+
"cwe": 190,
77+
"tool": "snyk-code",
78+
"key_event_idx": 0,
79+
"events": [
80+
{
81+
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
82+
"line": 161,
83+
"column": 19,
84+
"h_size": 7,
85+
"event": "note[cpp/IntegerOverflow/test]",
86+
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
87+
"verbosity_level": 0
88+
}
89+
]
90+
},
91+
{
92+
"checker": "SNYK_CODE_WARNING",
93+
"cwe": 190,
94+
"tool": "snyk-code",
95+
"key_event_idx": 0,
96+
"events": [
97+
{
98+
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
99+
"line": 161,
100+
"column": 30,
101+
"h_size": 2,
102+
"event": "note[cpp/IntegerOverflow/test]",
103+
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
104+
"verbosity_level": 0
105+
}
106+
]
107+
},
108+
{
109+
"checker": "SNYK_CODE_WARNING",
110+
"cwe": 190,
111+
"tool": "snyk-code",
112+
"key_event_idx": 0,
113+
"events": [
114+
{
115+
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
116+
"line": 307,
117+
"column": 35,
118+
"h_size": 7,
119+
"event": "note[cpp/IntegerOverflow/test]",
120+
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
121+
"verbosity_level": 0
122+
}
123+
]
124+
},
125+
{
126+
"checker": "SNYK_CODE_WARNING",
127+
"cwe": 190,
128+
"tool": "snyk-code",
129+
"key_event_idx": 0,
130+
"events": [
131+
{
132+
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
133+
"line": 444,
134+
"column": 7,
135+
"h_size": 29,
136+
"event": "note[cpp/IntegerOverflow/test]",
137+
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
138+
"verbosity_level": 0
139+
}
140+
]
141+
},
142+
{
143+
"checker": "SNYK_CODE_WARNING",
144+
"cwe": 190,
145+
"tool": "snyk-code",
146+
"key_event_idx": 0,
147+
"events": [
148+
{
149+
"file_name": "vendor/lz4-sys/liblz4/tests/abiTest.c",
150+
"line": 81,
151+
"column": 27,
152+
"h_size": 26,
153+
"event": "note[cpp/IntegerOverflow/test]",
154+
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
155+
"verbosity_level": 0
156+
}
157+
]
158+
},
159+
{
160+
"checker": "SNYK_CODE_WARNING",
161+
"cwe": 190,
162+
"tool": "snyk-code",
163+
"key_event_idx": 0,
164+
"events": [
165+
{
166+
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
167+
"line": 126,
168+
"column": 17,
169+
"h_size": 2,
170+
"event": "note[cpp/IntegerOverflow/test]",
171+
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
172+
"verbosity_level": 0
173+
}
174+
]
175+
},
176+
{
177+
"checker": "SNYK_CODE_WARNING",
178+
"cwe": 190,
179+
"tool": "snyk-code",
180+
"key_event_idx": 0,
181+
"events": [
182+
{
183+
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
184+
"line": 128,
185+
"column": 17,
186+
"h_size": 2,
187+
"event": "note[cpp/IntegerOverflow/test]",
188+
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
189+
"verbosity_level": 0
190+
}
191+
]
192+
},
193+
{
194+
"checker": "SNYK_CODE_WARNING",
195+
"cwe": 190,
196+
"tool": "snyk-code",
197+
"key_event_idx": 0,
198+
"events": [
199+
{
200+
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
201+
"line": 130,
202+
"column": 17,
203+
"h_size": 2,
204+
"event": "note[cpp/IntegerOverflow/test]",
205+
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
206+
"verbosity_level": 0
207+
}
208+
]
209+
},
210+
{
211+
"checker": "SNYK_CODE_WARNING",
212+
"cwe": 190,
213+
"tool": "snyk-code",
214+
"key_event_idx": 0,
215+
"events": [
216+
{
217+
"file_name": "vendor/openssl-src/openssl/test/confdump.c",
218+
"line": 37,
219+
"column": 25,
220+
"h_size": 10,
221+
"event": "note[cpp/IntegerOverflow/test]",
222+
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
223+
"verbosity_level": 0
224+
}
225+
]
226+
},
227+
{
228+
"checker": "SNYK_CODE_WARNING",
229+
"cwe": 190,
230+
"tool": "snyk-code",
231+
"key_event_idx": 0,
232+
"events": [
233+
{
234+
"file_name": "vendor/lz4-sys/liblz4/tests/fuzzer.c",
235+
"line": 378,
236+
"column": 95,
237+
"h_size": 9,
238+
"event": "note[cpp/IntegerOverflow/test]",
239+
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
240+
"verbosity_level": 0
241+
}
242+
]
243+
}
244+
]
245+
}

tests/csgrep/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,4 @@ test_csgrep("0121-cov-parser-lock-evasion" )
168168
test_csgrep("0122-json-parser-cov-v10-column" )
169169
test_csgrep("0123-csgrep-hash-v1" )
170170
test_csgrep("0124-sarif-writer-imp" )
171+
test_csgrep("0125-sarif-parser-bom" )

0 commit comments

Comments
 (0)