Skip to content

Commit 0e1fdd5

Browse files
committed
Initial string literal concat support (except interpolation not yet working)
Allows code like std::cout << "Hello " "world\n"; Note that in Cpp2 this is done in the grammar inside the language, whereas in Cpp2 it's done in the preprocessor outside the language.
1 parent e89b6ba commit 0e1fdd5

File tree

3 files changed

+63
-35
lines changed

3 files changed

+63
-35
lines changed

source/lex.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,8 +1659,8 @@ auto lex_line(
16591659
}
16601660

16611661
//G string-literal:
1662-
//G encoding-prefix? '"' s-char-seq? '"'
1663-
//G encoding-prefix? 'R"' d-char-seq? '(' s-char-seq? ')' d-char-seq? '"'
1662+
//G string-literal? encoding-prefix? '"' s-char-seq? '"'
1663+
//G string-literal? encoding-prefix? 'R"' d-char-seq? '(' s-char-seq? ')' d-char-seq? '"'
16641664
//G
16651665
//G s-char-seq:
16661666
//G interpolation? s-char

source/parse.h

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -186,44 +186,70 @@ struct primary_expression_node
186186

187187

188188
struct literal_node {
189-
token const* literal = {};
190-
token const* user_defined_suffix = {};
189+
// A literal is represented as a sequence of tokens:
190+
// - length 1: a literal (most common)
191+
// - length 2: a literal and a user-defined suffix
192+
// - length >= 2: a series of one or more of either of the above for string literals
193+
std::vector <token const*> pieces = {};
191194

192195
// API
193196
//
194197
auto get_token() const
195198
-> token const*
196199
{
197-
return literal;
200+
assert(!pieces.empty());
201+
return pieces.front();
198202
}
199203

200204
auto to_string() const
201205
-> std::string
202206
{
203-
assert (literal);
204-
auto ret = literal->to_string();
205-
if (user_defined_suffix) {
206-
ret += user_defined_suffix->to_string();
207+
assert(!pieces.empty());
208+
auto ret = std::string{};
209+
210+
for (bool first = true; auto p : pieces)
211+
{
212+
assert(p);
213+
214+
// Add a space to non-first pieces that start with " (i.e., not a UDL suffix)
215+
if (
216+
!std::exchange(first, false)
217+
&& p->as_string_view().starts_with("\"")
218+
)
219+
{
220+
ret += " ";
221+
}
222+
223+
ret += *p;
207224
}
225+
208226
return ret;
209227
}
210228

229+
auto has_user_defined_suffix()
230+
-> bool
231+
{
232+
return
233+
std::ssize(pieces) > 1
234+
&& !pieces[1]->as_string_view().starts_with("\"")
235+
;
236+
}
237+
211238
// Internals
212239
//
213240
auto position() const
214241
-> source_position
215242
{
216-
assert (literal);
217-
return literal->position();
243+
assert(!pieces.empty());
244+
return get_token()->position();
218245
}
219246

220247
auto visit(auto& v, int depth) -> void
221248
{
222249
v.start(*this, depth);
223-
assert (literal);
224-
literal->visit(v, depth+1);
225-
if (user_defined_suffix) {
226-
user_defined_suffix->visit(v, depth+1);
250+
for (auto p : pieces) {
251+
assert(p);
252+
p->visit(v, depth+1);
227253
}
228254
v.end(*this, depth);
229255
}
@@ -4617,18 +4643,7 @@ auto pretty_print_visualize(primary_expression_node const& n, int indent)
46174643
auto pretty_print_visualize(literal_node const& n, int)
46184644
-> std::string
46194645
{
4620-
// TODO: This is an initial visualizer implementation, and still
4621-
// skips a few rarer things (such as raw string literals)
4622-
4623-
assert(n.literal);
4624-
4625-
auto ret = n.literal->to_string();
4626-
4627-
if (n.user_defined_suffix) {
4628-
ret += n.user_defined_suffix->as_string_view();
4629-
}
4630-
4631-
return ret;
4646+
return n.to_string();
46324647
}
46334648

46344649

@@ -6908,12 +6923,26 @@ class parser
69086923
{
69096924
if (is_literal(curr().type())) {
69106925
auto n = std::make_unique<literal_node>();
6911-
n->literal = &curr();
6926+
n->pieces.push_back( &curr() );
69126927
next();
69136928
if (curr().type() == lexeme::UserDefinedLiteralSuffix) {
6914-
n->user_defined_suffix = &curr();
6929+
n->pieces.push_back(&curr());
69156930
next();
69166931
}
6932+
6933+
// String literals can have multiple chunks, such as "xyzzy" "plugh"
6934+
// (in Cpp2 these are merged in the preprocessor, in Cpp2 they're in the grammar)
6935+
if (n->pieces.front()->type() == lexeme::StringLiteral) {
6936+
while (curr().type() == lexeme::StringLiteral) {
6937+
n->pieces.push_back(&curr());
6938+
next();
6939+
if (curr().type() == lexeme::UserDefinedLiteralSuffix) {
6940+
n->pieces.push_back(&curr());
6941+
next();
6942+
}
6943+
}
6944+
}
6945+
69176946
return n;
69186947
}
69196948
return {};

source/to_cpp1.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,10 +1699,9 @@ class cppfront
16991699
pos = n.position();
17001700
}
17011701

1702-
assert(n.literal);
1703-
emit(*n.literal);
1704-
if (n.user_defined_suffix) {
1705-
emit(*n.user_defined_suffix);
1702+
for (auto p : n.pieces) {
1703+
assert(p);
1704+
emit(*p);
17061705
}
17071706
}
17081707

@@ -3458,7 +3457,7 @@ class cppfront
34583457
{
34593458
if (auto lit = i->expr_list->expressions.front().expr->get_literal();
34603459
lit
3461-
&& lit->literal->type() == lexeme::DecimalLiteral
3460+
&& lit->get_token()->type() == lexeme::DecimalLiteral
34623461
)
34633462
{
34643463
prefix.emplace_back( "CPP2_ASSERT_IN_BOUNDS_LITERAL(", i->op->position() );
@@ -3561,7 +3560,7 @@ class cppfront
35613560
&& is_literal(t->type())
35623561
&& t->type() != lexeme::StringLiteral
35633562
&& t->type() != lexeme::FloatLiteral
3564-
&& !std::get<primary_expression_node::literal>(p->expr)->user_defined_suffix
3563+
&& !std::get<primary_expression_node::literal>(p->expr)->has_user_defined_suffix()
35653564
&& std::ssize(n.ops) > 0
35663565
&& *n.ops[0].op == "as"
35673566
)

0 commit comments

Comments
 (0)