Skip to content

Commit ed31a0c

Browse files
Earlopainmatzbot
authored andcommitted
[ruby/prism] Correctly handle line continuations in %w/i% interrupted by heredocs
See https://bugs.ruby-lang.org/issues/21756. Ripper fails to parse this, but prism actually also doesn't handle it correctly. When heredocs are used, even in lowercase percent arays there can be multiple `STRING_CONTENT` tokens. We need to concat them. Luckily we don't need to handle as many cases as in uppercase arrays where interpolation is allowed. ruby/prism@211677000e
1 parent f9cd94f commit ed31a0c

1 file changed

Lines changed: 62 additions & 9 deletions

File tree

prism/prism.c

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19299,18 +19299,52 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1929919299
parser_lex(parser);
1930019300
pm_token_t opening = parser->previous;
1930119301
pm_array_node_t *array = pm_array_node_create(parser, &opening);
19302+
pm_node_t *current = NULL;
1930219303

1930319304
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
1930419305
accept1(parser, PM_TOKEN_WORDS_SEP);
1930519306
if (match1(parser, PM_TOKEN_STRING_END)) break;
1930619307

19307-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19308+
// Interpolation is not possible but nested heredocs can still lead to
19309+
// consecutive (disjoint) string tokens when the final newline is escaped.
19310+
while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
1930819311
pm_token_t opening = not_provided(parser);
1930919312
pm_token_t closing = not_provided(parser);
19310-
pm_array_node_elements_append(array, UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)));
19313+
19314+
// Record the string node, moving to interpolation if needed.
19315+
if (current == NULL) {
19316+
current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19317+
parser_lex(parser);
19318+
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19319+
pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19320+
parser_lex(parser);
19321+
pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19322+
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19323+
pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19324+
pm_token_t bounds = not_provided(parser);
19325+
19326+
pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19327+
pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19328+
pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19329+
parser_lex(parser);
19330+
19331+
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19332+
pm_interpolated_symbol_node_append(interpolated, first_string);
19333+
pm_interpolated_symbol_node_append(interpolated, second_string);
19334+
19335+
xfree(current);
19336+
current = UP(interpolated);
19337+
} else {
19338+
assert(false && "unreachable");
19339+
}
1931119340
}
1931219341

19313-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19342+
if (current) {
19343+
pm_array_node_elements_append(array, current);
19344+
current = NULL;
19345+
} else {
19346+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19347+
}
1931419348
}
1931519349

1931619350
pm_token_t closing = parser->current;
@@ -19489,23 +19523,42 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1948919523
parser_lex(parser);
1949019524
pm_token_t opening = parser->previous;
1949119525
pm_array_node_t *array = pm_array_node_create(parser, &opening);
19492-
19493-
// skip all leading whitespaces
19494-
accept1(parser, PM_TOKEN_WORDS_SEP);
19526+
pm_node_t *current = NULL;
1949519527

1949619528
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
1949719529
accept1(parser, PM_TOKEN_WORDS_SEP);
1949819530
if (match1(parser, PM_TOKEN_STRING_END)) break;
1949919531

19500-
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19532+
// Interpolation is not possible but nested heredocs can still lead to
19533+
// consecutive (disjoint) string tokens when the final newline is escaped.
19534+
while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
1950119535
pm_token_t opening = not_provided(parser);
1950219536
pm_token_t closing = not_provided(parser);
1950319537

1950419538
pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19505-
pm_array_node_elements_append(array, string);
19539+
19540+
// Record the string node, moving to interpolation if needed.
19541+
if (current == NULL) {
19542+
current = string;
19543+
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19544+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19545+
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19546+
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19547+
pm_interpolated_string_node_append(interpolated, current);
19548+
pm_interpolated_string_node_append(interpolated, string);
19549+
current = UP(interpolated);
19550+
} else {
19551+
assert(false && "unreachable");
19552+
}
19553+
parser_lex(parser);
1950619554
}
1950719555

19508-
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19556+
if (current) {
19557+
pm_array_node_elements_append(array, current);
19558+
current = NULL;
19559+
} else {
19560+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19561+
}
1950919562
}
1951019563

1951119564
pm_token_t closing = parser->current;

0 commit comments

Comments
 (0)