-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f5f7ed9
commit 262fa14
Showing
13 changed files
with
370 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
!lib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
syntax = "proto3"; | ||
|
||
package for_testing; | ||
|
||
message SubMessage { | ||
optional int32 i1 = 1; | ||
optional string str1 = 2; | ||
} | ||
|
||
message TestMessage { | ||
optional string str1 = 1; | ||
optional string str2 = 2; | ||
|
||
optional int32 i1 = 3; | ||
optional uint32 ui1 = 4; | ||
optional float x = 5; | ||
|
||
repeated int32 int_array = 6; | ||
repeated float float_array = 7; | ||
|
||
repeated string repeated_string = 8; | ||
|
||
optional SubMessage sub_message = 9; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#include <iostream> | ||
|
||
#include "Foundational/iwstring/iwstring.h" | ||
|
||
namespace iwstring { | ||
|
||
using std::cerr; | ||
|
||
constexpr int kInvalid = -1; | ||
|
||
int | ||
TokeniseWithQuotes(const const_IWSubstring& buffer, | ||
char sep, | ||
resizable_array<int>& tstart, | ||
resizable_array<int>& tstop) { | ||
tstart.resize_keep_storage(0); | ||
tstop.resize_keep_storage(0); | ||
|
||
static constexpr char kDQuote = '"'; | ||
|
||
const int nchars = buffer.length(); | ||
if (nchars == 0) { | ||
return 0; | ||
} | ||
|
||
// Maybe allow for empty token at start? | ||
if (buffer[0] == sep) { | ||
return kInvalid; | ||
} | ||
|
||
bool inside_quoted_string = false; | ||
int ntokens = 1; | ||
|
||
if (buffer[0] == kDQuote) { | ||
tstart << 1; | ||
inside_quoted_string = true; | ||
} else { | ||
tstart << 0; | ||
} | ||
|
||
for (int i = 1; i < nchars; ++i) { | ||
const char c = buffer[i]; | ||
char next_char; | ||
if (i == nchars - 1) { | ||
next_char = '\0'; | ||
} else { | ||
next_char = buffer[i + 1]; | ||
} | ||
|
||
if (inside_quoted_string) { | ||
if (c == kDQuote && (next_char == sep || next_char == '\0')) { | ||
inside_quoted_string = false; | ||
} | ||
} else if (c == sep) { | ||
if (buffer[i-1] == kDQuote) { | ||
tstop << (i - 1); | ||
} else { | ||
tstop << (i - 0); | ||
} | ||
if (next_char == kDQuote) { | ||
tstart << (i + 2); | ||
} else { | ||
tstart << (i + 1); | ||
} | ||
++ntokens; | ||
} else if (c == kDQuote && buffer[i-1] == sep) { | ||
inside_quoted_string = true; | ||
} | ||
} | ||
|
||
if (inside_quoted_string) { | ||
cerr << "TokeniseWithQuotes:unclosed quote '" << buffer << "'\n"; | ||
return kInvalid; | ||
} | ||
|
||
if (buffer.ends_with(kDQuote)) { | ||
tstop << (buffer.length() - 1); | ||
} else { | ||
tstop << (buffer.length() - 0); | ||
} | ||
|
||
if (tstart.size() != tstop.size()) { | ||
cerr << "TokeniseWithQuotes::Mismatch between opening and closing tokens\n"; | ||
cerr << tstart.size() << " vs " << tstop.size() << '\n'; | ||
return kInvalid; | ||
} | ||
|
||
if (tstart.number_elements() != ntokens) { | ||
cerr << "TokeniseWithQuotes:Mismatch btw tokens " << ntokens << | ||
" and array size " << tstart.size() << '\n'; | ||
return kInvalid; | ||
} | ||
|
||
// #define DEBUG_TOKENISE_WITH_QUOTES | ||
#ifdef DEBUG_TOKENISE_WITH_QUOTES | ||
cerr << "Found " << ntokens << " tokens\n"; | ||
for (int i = 0; i < tstart.number_elements(); ++i) { | ||
cerr << ' ' << i << " start " << tstart[i] << ' ' << buffer[tstart[i]] << | ||
" stop " << tstop[i] << ' ' << buffer[tstop[i]] << '\n'; | ||
} | ||
#endif | ||
|
||
return ntokens; | ||
} | ||
} // namespace iwstring |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
#include "googlemock/include/gmock/gmock.h" | ||
#include "googletest/include/gtest/gtest.h" | ||
|
||
#include "iwstring.h" | ||
|
||
namespace { | ||
|
||
using iwstring::TokeniseWithQuotes; | ||
|
||
struct Data { | ||
IWString buffer; | ||
char sep; | ||
int ntokens; | ||
std::vector<const char*> expected; | ||
}; | ||
|
||
class TestTokenise: public testing::TestWithParam<Data> { | ||
protected: | ||
resizable_array<int> _tstart; | ||
resizable_array<int> _tstop; | ||
}; | ||
|
||
TEST_P(TestTokenise, TestTokenise) { | ||
const auto params = GetParam(); | ||
EXPECT_EQ(TokeniseWithQuotes(params.buffer, params.sep, _tstart, _tstop), params.ntokens) << | ||
params.buffer; | ||
|
||
// Expected failure encountered, cannot extract matching tokens. | ||
if (params.ntokens < 0) { | ||
return; | ||
} | ||
|
||
for (int i = 0; i < params.ntokens; ++i) { | ||
int b = _tstart[i]; | ||
int e = _tstop[i]; | ||
// std::cerr << "b " << b << " e " << e << '\n'; | ||
const_IWSubstring token(params.buffer.rawdata() + b, e - b); | ||
EXPECT_EQ(params.expected[i], token) << i << " mismatch '" << params.expected[i] << | ||
"' got '" << token << "' in " << params.buffer; | ||
} | ||
} | ||
INSTANTIATE_TEST_SUITE_P(TestTokenise, TestTokenise, testing::Values( | ||
Data{"a,b", ',', 2, {"a", "b"}}, | ||
Data{"aa,b", ',', 2, {"aa", "b"}}, | ||
Data{"aa,bb", ',', 2, {"aa", "bb"}}, | ||
Data{"aaa,bb", ',', 2, {"aaa", "bb"}}, | ||
Data{R"("a","b")", ',', 2, {"a", "b"}}, | ||
Data{R"("a a","b")", ',', 2, {"a a", "b"}}, | ||
Data{R"("a a","b b")", ',', 2, {"a a", "b b"}}, | ||
Data{R"(a,"b b")", ',', 2, {"a", "b b"}}, | ||
Data{R"("a a",b)", ',', 2, {"a a", "b"}}, | ||
Data{R"("a,a",b)", ',', 2, {"a,a", "b"}}, | ||
Data{R"("a,a",,b)", ',', 3, {"a,a", "", "b"}}, | ||
Data{R"("a,a",,b,)", ',', 4, {"a,a", "", "b", ""}}, | ||
Data{R"(,"a,a",,b,)", ',', -1, {"", "a,a", "", "b", ""}} | ||
)); | ||
|
||
|
||
} // namespace |
Oops, something went wrong.