Skip to content

Commit e7d85ea

Browse files
Special-case ## pasting to string/character constants (issue #168) (#255)
This enables use of macros to add literals/operator "".
1 parent e4cb748 commit e7d85ea

3 files changed

Lines changed: 193 additions & 43 deletions

File tree

run-tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def cleanup(out):
3838
'has_attribute.cpp',
3939
'header_lookup1.c', # missing include <stddef.h>
4040
'line-directive-output.c',
41-
'macro_paste_hashhash.c',
41+
# 'macro_paste_hashhash.c',
4242
'microsoft-ext.c',
4343
'normalize-3.c', # gcc has different output \uAC00 vs \U0000AC00 on cygwin/linux
4444
'pr63831-1.c', # __has_attribute => works differently on cygwin/linux

simplecpp.cpp

Lines changed: 72 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ static bool isOct(const std::string &s)
5858
return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8');
5959
}
6060

61+
static bool isStringLiteral(const std::string &s)
62+
{
63+
return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"');
64+
}
65+
66+
static bool isCharLiteral(const std::string &s)
67+
{
68+
// char literal patterns can include 'a', '\t', '\000', '\xff', 'abcd', and maybe ''
69+
// This only checks for the surrounding '' but doesn't parse the content.
70+
return s.size() > 1 && (s[0]=='\'') && (*s.rbegin()=='\'');
71+
}
6172

6273
static const simplecpp::TokenString DEFINE("define");
6374
static const simplecpp::TokenString UNDEF("undef");
@@ -1922,7 +1933,8 @@ namespace simplecpp {
19221933
throw invalidHashHash(tok->location, name());
19231934

19241935
bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>";
1925-
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual)
1936+
bool canBeConcatenatedStringOrChar = isStringLiteral(A->str()) || isCharLiteral(A->str());
1937+
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar)
19261938
throw invalidHashHash(tok->location, name());
19271939

19281940
Token *B = tok->next->next;
@@ -1933,55 +1945,73 @@ namespace simplecpp {
19331945
(!canBeConcatenatedWithEqual && B->op == '='))
19341946
throw invalidHashHash(tok->location, name());
19351947

1936-
std::string strAB;
1937-
1938-
const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];
1948+
// Superficial check; more in-depth would in theory be possible _after_ expandArg
1949+
if (canBeConcatenatedStringOrChar && (B->number || !B->name))
1950+
throw invalidHashHash(tok->location, name());
19391951

19401952
TokenList tokensB(files);
1941-
if (expandArg(&tokensB, B, parametertokens)) {
1942-
if (tokensB.empty())
1943-
strAB = A->str();
1944-
else if (varargs && A->op == ',') {
1945-
strAB = ",";
1953+
const Token *nextTok = B->next;
1954+
1955+
if (canBeConcatenatedStringOrChar) {
1956+
// It seems clearer to handle this case separately even though the code is similar-ish, but we don't want to merge here.
1957+
// TODO The question is whether the ## or varargs may still apply, and how to provoke?
1958+
if (expandArg(&tokensB, B, parametertokens)) {
1959+
for (Token *b = tokensB.front(); b; b = b->next)
1960+
b->location = loc;
19461961
} else {
1947-
strAB = A->str() + tokensB.cfront()->str();
1948-
tokensB.deleteToken(tokensB.front());
1962+
tokensB.push_back(new Token(*B));
1963+
tokensB.back()->location = loc;
19491964
}
1950-
} else {
1951-
strAB = A->str() + B->str();
1952-
}
1953-
1954-
const Token *nextTok = B->next;
1955-
if (varargs && tokensB.empty() && tok->previous->str() == ",")
1956-
output->deleteToken(A);
1957-
else if (strAB != "," && macros.find(strAB) == macros.end()) {
1958-
A->setstr(strAB);
1959-
for (Token *b = tokensB.front(); b; b = b->next)
1960-
b->location = loc;
19611965
output->takeTokens(tokensB);
1962-
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
1963-
TokenList output2(files);
1964-
output2.push_back(new Token(strAB, tok->location));
1965-
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
1966-
output->deleteToken(A);
1967-
output->takeTokens(output2);
19681966
} else {
1969-
output->deleteToken(A);
1970-
TokenList tokens(files);
1971-
tokens.push_back(new Token(strAB, tok->location));
1972-
// for function like macros, push the (...)
1973-
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
1974-
const MacroMap::const_iterator it = macros.find(strAB);
1975-
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
1976-
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
1977-
if (tok2)
1978-
nextTok = tok2->next;
1967+
std::string strAB;
1968+
1969+
const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];
1970+
1971+
if (expandArg(&tokensB, B, parametertokens)) {
1972+
if (tokensB.empty())
1973+
strAB = A->str();
1974+
else if (varargs && A->op == ',') {
1975+
strAB = ",";
1976+
} else {
1977+
strAB = A->str() + tokensB.cfront()->str();
1978+
tokensB.deleteToken(tokensB.front());
1979+
}
1980+
} else {
1981+
strAB = A->str() + B->str();
1982+
}
1983+
1984+
if (varargs && tokensB.empty() && tok->previous->str() == ",")
1985+
output->deleteToken(A);
1986+
else if (strAB != "," && macros.find(strAB) == macros.end()) {
1987+
A->setstr(strAB);
1988+
for (Token *b = tokensB.front(); b; b = b->next)
1989+
b->location = loc;
1990+
output->takeTokens(tokensB);
1991+
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
1992+
TokenList output2(files);
1993+
output2.push_back(new Token(strAB, tok->location));
1994+
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
1995+
output->deleteToken(A);
1996+
output->takeTokens(output2);
1997+
} else {
1998+
output->deleteToken(A);
1999+
TokenList tokens(files);
2000+
tokens.push_back(new Token(strAB, tok->location));
2001+
// for function like macros, push the (...)
2002+
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
2003+
const MacroMap::const_iterator it = macros.find(strAB);
2004+
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
2005+
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
2006+
if (tok2)
2007+
nextTok = tok2->next;
2008+
}
19792009
}
2010+
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
2011+
for (Token *b = tokensB.front(); b; b = b->next)
2012+
b->location = loc;
2013+
output->takeTokens(tokensB);
19802014
}
1981-
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
1982-
for (Token *b = tokensB.front(); b; b = b->next)
1983-
b->location = loc;
1984-
output->takeTokens(tokensB);
19852015
}
19862016

19872017
return nextTok;

test.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,105 @@ static void hashhash13()
10711071
ASSERT_EQUALS("\n& ab", preprocess(code2));
10721072
}
10731073

1074+
static void hashhash_string_literal()
1075+
{
1076+
const char code[] =
1077+
"#define UL(x) x##_ul\n"
1078+
"\"ABC\"_ul;\n"
1079+
"UL(\"ABC\");";
1080+
1081+
ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code));
1082+
}
1083+
1084+
static void hashhash_string_wrapped()
1085+
{
1086+
const char code[] =
1087+
"#define CONCAT(a,b) a##b\n"
1088+
"#define STR(x) CONCAT(x,s)\n"
1089+
"STR(\"ABC\");";
1090+
1091+
ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code));
1092+
}
1093+
1094+
static void hashhash_char_literal()
1095+
{
1096+
const char code[] =
1097+
"#define CH(x) x##_ch\n"
1098+
"CH('a');";
1099+
1100+
ASSERT_EQUALS("\n'a' _ch ;", preprocess(code));
1101+
}
1102+
1103+
static void hashhash_multichar_literal()
1104+
{
1105+
const char code[] =
1106+
"#define CH(x) x##_ch\n"
1107+
"CH('abcd');";
1108+
1109+
ASSERT_EQUALS("\n'abcd' _ch ;", preprocess(code));
1110+
}
1111+
1112+
static void hashhash_char_escaped()
1113+
{
1114+
const char code[] =
1115+
"#define CH(x) x##_ch\n"
1116+
"CH('\\'');";
1117+
1118+
ASSERT_EQUALS("\n'\\'' _ch ;", preprocess(code));
1119+
}
1120+
1121+
static void hashhash_string_nothing()
1122+
{
1123+
const char code[] =
1124+
"#define CONCAT(a,b) a##b\n"
1125+
"CONCAT(\"ABC\",);";
1126+
1127+
ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code));
1128+
}
1129+
1130+
static void hashhash_string_char()
1131+
{
1132+
const char code[] =
1133+
"#define CONCAT(a,b) a##b\n"
1134+
"CONCAT(\"ABC\", 'c');";
1135+
1136+
// This works, but maybe shouldn't since the result isn't useful.
1137+
ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code));
1138+
}
1139+
1140+
static void hashhash_string_name()
1141+
{
1142+
const char code[] =
1143+
"#define CONCAT(a,b) a##b\n"
1144+
"#define LIT _literal\n"
1145+
"CONCAT(\"string\", LIT);";
1146+
1147+
// TODO is this correct? clang fails because that's not really a valid thing but gcc seems to accept it
1148+
// see https://gist.github.com/patrickdowling/877a25294f069bf059f3b07f9b5b7039
1149+
1150+
ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code));
1151+
}
1152+
1153+
static void hashhashhash_int_literal()
1154+
{
1155+
const char code[] =
1156+
"#define CONCAT(a,b,c) a##b##c\n"
1157+
"#define PASTER(a,b,c) CONCAT(a,b,c)\n"
1158+
"PASTER(\"123\",_i,ul);";
1159+
1160+
ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code));
1161+
}
1162+
1163+
static void hashhash_int_literal()
1164+
{
1165+
const char code[] =
1166+
"#define PASTE(a,b) a##b\n"
1167+
"PASTE(123,_i);\n"
1168+
"1234_i;\n";
1169+
1170+
ASSERT_EQUALS("\n123_i ;\n1234_i ;", preprocess(code));
1171+
}
1172+
10741173
static void hashhash_invalid_1()
10751174
{
10761175
const char code[] = "#define f(a) (##x)\nf(1)";
@@ -1087,6 +1186,16 @@ static void hashhash_invalid_2()
10871186
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList));
10881187
}
10891188

1189+
static void hashhash_invalid_3()
1190+
{
1191+
const char code[] =
1192+
"#define BAD(x) x##12345\nBAD(\"ABC\")";
1193+
1194+
simplecpp::OutputList outputList;
1195+
preprocess(code, simplecpp::DUI(), &outputList);
1196+
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD'.\n", toString(outputList));
1197+
}
1198+
10901199
static void has_include_1()
10911200
{
10921201
const char code[] = "#ifdef __has_include\n"
@@ -2306,8 +2415,19 @@ int main(int argc, char **argv)
23062415
TEST_CASE(hashhash11); // #60: #define x # # #
23072416
TEST_CASE(hashhash12);
23082417
TEST_CASE(hashhash13);
2418+
TEST_CASE(hashhash_string_literal);
2419+
TEST_CASE(hashhash_string_wrapped);
2420+
TEST_CASE(hashhash_char_literal);
2421+
TEST_CASE(hashhash_multichar_literal);
2422+
TEST_CASE(hashhash_char_escaped);
2423+
TEST_CASE(hashhash_string_nothing);
2424+
TEST_CASE(hashhash_string_char);
2425+
TEST_CASE(hashhash_string_name);
2426+
TEST_CASE(hashhashhash_int_literal);
2427+
TEST_CASE(hashhash_int_literal);
23092428
TEST_CASE(hashhash_invalid_1);
23102429
TEST_CASE(hashhash_invalid_2);
2430+
TEST_CASE(hashhash_invalid_3);
23112431

23122432
// c++17 __has_include
23132433
TEST_CASE(has_include_1);

0 commit comments

Comments
 (0)