Skip to content

Commit 3127eea

Browse files
authored
Support unescape plus in query parameters (#60)
* Support unescape plus in query parameters Signed-off-by: Wayne Zhang <[email protected]> * update per comment Signed-off-by: Wayne Zhang <[email protected]>
1 parent f1591a4 commit 3127eea

File tree

2 files changed

+90
-27
lines changed

2 files changed

+90
-27
lines changed

src/include/grpc_transcoding/path_matcher.h

+43-22
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ class PathMatcher {
9494
// The info associated with each method. The path matcher nodes
9595
// will hold pointers to MethodData objects in this vector.
9696
std::vector<std::unique_ptr<MethodData>> methods_;
97-
UrlUnescapeSpec unescape_spec_;
97+
UrlUnescapeSpec path_unescape_spec_;
98+
bool query_param_unescape_plus_;
9899

99100
private:
100101
friend class PathMatcherBuilder<Method>;
@@ -127,8 +128,14 @@ class PathMatcherBuilder {
127128
const std::string& body_field_path, Method method);
128129

129130
// Change unescaping behavior, see UrlUnescapeSpec for available options.
130-
void SetUrlUnescapeSpec(UrlUnescapeSpec unescape_spec) {
131-
unescape_spec_ = unescape_spec;
131+
// This only applies to path, not query parameters.
132+
void SetUrlUnescapeSpec(UrlUnescapeSpec path_unescape_spec) {
133+
path_unescape_spec_ = path_unescape_spec;
134+
}
135+
// If true, unescape '+' in query parameters to space. Default is false.
136+
// To support [HTML 2.0 or RFC1866](https://tools.ietf.org/html/rfc1866#section-8.2.1).
137+
void SetQueryParamUnescapePlus(bool query_param_unescape_plus) {
138+
query_param_unescape_plus_ = query_param_unescape_plus;
132139
}
133140

134141
// Returns a unique_ptr to a thread safe PathMatcher that contains all
@@ -151,8 +158,9 @@ class PathMatcherBuilder {
151158
std::unordered_set<std::string> custom_verbs_;
152159
typedef typename PathMatcher<Method>::MethodData MethodData;
153160
std::vector<std::unique_ptr<MethodData>> methods_;
154-
UrlUnescapeSpec unescape_spec_ =
161+
UrlUnescapeSpec path_unescape_spec_ =
155162
UrlUnescapeSpec::kAllCharactersExceptReserved;
163+
bool query_param_unescape_plus_ = false;
156164

157165
friend class PathMatcher<Method>;
158166
};
@@ -222,44 +230,53 @@ inline int hex_digit_to_int(char c) {
222230
//
223231
// If the next three characters are an escaped character then this function will
224232
// also return what character is escaped.
225-
bool GetEscapedChar(const std::string& src, size_t i,
226-
UrlUnescapeSpec unescape_spec, char* out) {
233+
//
234+
// If unescape_plus is true, unescape '+' to space.
235+
//
236+
// return value: 0: not unescaped, >0: unescaped, number of used original characters.
237+
//
238+
int GetEscapedChar(const std::string& src, size_t i,
239+
UrlUnescapeSpec unescape_spec, bool unescape_plus, char* out) {
240+
if (unescape_plus && src[i] == '+') {
241+
*out = ' ';
242+
return 1;
243+
}
227244
if (i + 2 < src.size() && src[i] == '%') {
228245
if (ascii_isxdigit(src[i + 1]) && ascii_isxdigit(src[i + 2])) {
229246
char c =
230247
(hex_digit_to_int(src[i + 1]) << 4) | hex_digit_to_int(src[i + 2]);
231248
switch (unescape_spec) {
232249
case UrlUnescapeSpec::kAllCharactersExceptReserved:
233250
if (IsReservedChar(c)) {
234-
return false;
251+
return 0;
235252
}
236253
break;
237254
case UrlUnescapeSpec::kAllCharactersExceptSlash:
238255
if (c == '/') {
239-
return false;
256+
return 0;
240257
}
241258
break;
242259
case UrlUnescapeSpec::kAllCharacters:
243260
break;
244261
}
245262
*out = c;
246-
return true;
263+
return 3;
247264
}
248265
}
249-
return false;
266+
return 0;
250267
}
251268

252269
// Unescapes string 'part' and returns the unescaped string. Reserved characters
253270
// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is
254271
// false.
255272
std::string UrlUnescapeString(const std::string& part,
256-
UrlUnescapeSpec unescape_spec) {
273+
UrlUnescapeSpec unescape_spec, bool unescape_plus) {
257274
std::string unescaped;
258275
// Check whether we need to escape at all.
259276
bool needs_unescaping = false;
260277
char ch = '\0';
261278
for (size_t i = 0; i < part.size(); ++i) {
262-
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
279+
if (GetEscapedChar(part, i, unescape_spec, unescape_plus, &ch) > 0) {
263280
needs_unescaping = true;
264281
break;
265282
}
@@ -275,9 +292,10 @@ std::string UrlUnescapeString(const std::string& part,
275292
char* p = begin;
276293

277294
for (size_t i = 0; i < part.size();) {
278-
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
295+
int skip = GetEscapedChar(part, i, unescape_spec, unescape_plus, &ch);
296+
if (skip > 0) {
279297
*p++ = ch;
280-
i += 3;
298+
i += skip;
281299
} else {
282300
*p++ = part[i];
283301
i += 1;
@@ -291,8 +309,8 @@ std::string UrlUnescapeString(const std::string& part,
291309
template <class VariableBinding>
292310
void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
293311
const std::vector<std::string>& parts,
294-
std::vector<VariableBinding>* bindings,
295-
UrlUnescapeSpec unescape_spec) {
312+
UrlUnescapeSpec unescape_spec,
313+
std::vector<VariableBinding>* bindings) {
296314
for (const auto& var : vars) {
297315
// Determine the subpath bound to the variable based on the
298316
// [start_segment, end_segment) segment range of the variable.
@@ -316,7 +334,7 @@ void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
316334
// Joins parts with "/" to form a path string.
317335
for (size_t i = var.start_segment; i < end_segment; ++i) {
318336
// For multipart matches only unescape non-reserved characters.
319-
binding.value += UrlUnescapeString(parts[i], var_unescape_spec);
337+
binding.value += UrlUnescapeString(parts[i], var_unescape_spec, false);
320338
if (i < end_segment - 1) {
321339
binding.value += "/";
322340
}
@@ -329,6 +347,7 @@ template <class VariableBinding>
329347
void ExtractBindingsFromQueryParameters(
330348
const std::string& query_params,
331349
const std::unordered_set<std::string>& system_params,
350+
bool query_param_unescape_plus,
332351
std::vector<VariableBinding>* bindings) {
333352
// The bindings in URL the query parameters have the following form:
334353
// <field_path1>=value1&<field_path2>=value2&...&<field_pathN>=valueN
@@ -350,7 +369,8 @@ void ExtractBindingsFromQueryParameters(
350369
VariableBinding binding;
351370
split(name, '.', binding.field_path);
352371
binding.value = UrlUnescapeString(param.substr(pos + 1),
353-
UrlUnescapeSpec::kAllCharacters);
372+
UrlUnescapeSpec::kAllCharacters,
373+
query_param_unescape_plus);
354374
bindings->emplace_back(std::move(binding));
355375
}
356376
}
@@ -424,7 +444,8 @@ PathMatcher<Method>::PathMatcher(PathMatcherBuilder<Method>&& builder)
424444
: root_ptr_(std::move(builder.root_ptr_)),
425445
custom_verbs_(std::move(builder.custom_verbs_)),
426446
methods_(std::move(builder.methods_)),
427-
unescape_spec_(builder.unescape_spec_) {}
447+
path_unescape_spec_(builder.path_unescape_spec_),
448+
query_param_unescape_plus_(builder.query_param_unescape_plus_) {}
428449

429450
// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper
430451
// splits the request path into slash-separated path parts. Next, the method
@@ -460,11 +481,11 @@ Method PathMatcher<Method>::Lookup(
460481
MethodData* method_data = reinterpret_cast<MethodData*>(lookup_result.data);
461482
if (variable_bindings != nullptr) {
462483
variable_bindings->clear();
463-
ExtractBindingsFromPath(method_data->variables, parts, variable_bindings,
464-
unescape_spec_);
484+
ExtractBindingsFromPath(method_data->variables, parts,
485+
path_unescape_spec_, variable_bindings);
465486
ExtractBindingsFromQueryParameters(
466487
query_params, method_data->system_query_parameter_names,
467-
variable_bindings);
488+
query_param_unescape_plus_, variable_bindings);
468489
}
469490
if (body_field_path != nullptr) {
470491
*body_field_path = method_data->body_field_path;

test/path_matcher_test.cc

+47-5
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ class PathMatcherTest : public ::testing::Test {
124124
builder_.SetUrlUnescapeSpec(unescape_spec);
125125
}
126126

127+
void SetQueryParamUnescapePlus(bool query_param_unescape_plus) {
128+
builder_.SetQueryParamUnescapePlus(query_param_unescape_plus);
129+
}
130+
127131
void Build() { matcher_ = builder_.Build(); }
128132

129133
MethodInfo* LookupWithBodyFieldPath(std::string method, std::string path,
@@ -320,9 +324,10 @@ TEST_F(PathMatcherTest, PercentEscapesUnescapedForSingleSegment) {
320324
EXPECT_NE(nullptr, a_c);
321325

322326
Bindings bindings;
323-
EXPECT_EQ(Lookup("GET", "/a/p%20q%2Fr/c", &bindings), a_c);
327+
// Also test '+', make sure it is not unescaped
328+
EXPECT_EQ(Lookup("GET", "/a/p%20q%2Fr+/c", &bindings), a_c);
324329
EXPECT_EQ(Bindings({
325-
Binding{FieldPath{"x"}, "p q/r"},
330+
Binding{FieldPath{"x"}, "p q/r+"},
326331
}),
327332
bindings);
328333
}
@@ -383,9 +388,10 @@ TEST_F(PathMatcherTest, PercentEscapesNotUnescapedForMultiSegment2) {
383388
EXPECT_NE(nullptr, a__c);
384389

385390
Bindings bindings;
386-
EXPECT_EQ(Lookup("GET", "/a/p/foo%20foo/q/bar%2Fbar/c", &bindings), a__c);
387-
// space (%20) is escaped, but slash (%2F) isn't.
388-
EXPECT_EQ(Bindings({Binding{FieldPath{"x"}, "p/foo foo/q/bar%2Fbar"}}),
391+
// Also test '+', make sure it is not unescaped
392+
EXPECT_EQ(Lookup("GET", "/a/p/foo%20foo/q/bar%2Fbar+/c", &bindings), a__c);
393+
// space (%20) is unescaped, but slash (%2F) isn't. nor +
394+
EXPECT_EQ(Bindings({Binding{FieldPath{"x"}, "p/foo foo/q/bar%2Fbar+"}}),
389395
bindings);
390396
}
391397

@@ -819,6 +825,42 @@ TEST_F(PathMatcherTest, VariableBindingsWithQueryParamsEncoding) {
819825
bindings);
820826
}
821827

828+
TEST_F(PathMatcherTest, QueryParameterNotUnescapePlus) {
829+
MethodInfo* a = AddGetPath("/a");
830+
Build();
831+
832+
EXPECT_NE(nullptr, a);
833+
834+
Bindings bindings;
835+
// The bindings from the query parameters "x=Hello+world&y=%2B+%20"
836+
// By default, only unescape percent-encoded %HH, but not '+'
837+
EXPECT_EQ(LookupWithParams("GET", "/a", "x=Hello+world&y=%2B+%20", &bindings), a);
838+
EXPECT_EQ(Bindings({
839+
Binding{FieldPath{"x"}, "Hello+world"},
840+
Binding{FieldPath{"y"}, "++ "},
841+
}),
842+
bindings);
843+
}
844+
845+
TEST_F(PathMatcherTest, QueryParameterUnescapePlus) {
846+
MethodInfo* a = AddGetPath("/a");
847+
// Enable query_param_unescape_plus to unescape '+'
848+
SetQueryParamUnescapePlus(true);
849+
Build();
850+
851+
EXPECT_NE(nullptr, a);
852+
853+
Bindings bindings;
854+
// The bindings from the query parameters "x=Hello+world&y=%2B+%20"
855+
// Unescape percent-encoded %HH, and convert '+' to space
856+
EXPECT_EQ(LookupWithParams("GET", "/a", "x=Hello+world&y=%2B+%20", &bindings), a);
857+
EXPECT_EQ(Bindings({
858+
Binding{FieldPath{"x"}, "Hello world"},
859+
Binding{FieldPath{"y"}, "+ "},
860+
}),
861+
bindings);
862+
}
863+
822864
TEST_F(PathMatcherTest, VariableBindingsWithQueryParamsAndSystemParams) {
823865
std::unordered_set<std::string> system_params{"key", "api_key"};
824866
MethodInfo* a_b = AddPathWithSystemParams("GET", "/a/{x}/b", &system_params);

0 commit comments

Comments
 (0)