Skip to content

Commit 62cde23

Browse files
committed
compile --rule-file pattern only once / extracted regular expressions code to separate file / improved errorhandling of --rule
1 parent e829297 commit 62cde23

10 files changed

Lines changed: 553 additions & 373 deletions

File tree

Makefile

Lines changed: 123 additions & 119 deletions
Large diffs are not rendered by default.

cli/cmdlineparser.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,18 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
10611061
#ifdef HAVE_RULES
10621062
Settings::Rule rule;
10631063
rule.pattern = 7 + argv[i];
1064+
1065+
if (rule.pattern.empty()) {
1066+
mLogger.printError("no rule pattern provided.");
1067+
return Result::Fail;
1068+
}
1069+
1070+
rule.regex = std::make_shared<Regex>(rule.pattern);
1071+
const std::string regex_err = rule.regex->compile();
1072+
if (!regex_err.empty()) {
1073+
mLogger.printError("failed to compile rule pattern '" + rule.pattern + "' (" + regex_err + ").");
1074+
return Result::Fail;
1075+
}
10641076
mSettings.rules.emplace_back(std::move(rule));
10651077
#else
10661078
mLogger.printError("Option --rule cannot be used as Cppcheck has not been built with rules support.");
@@ -1131,6 +1143,13 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a
11311143
return Result::Fail;
11321144
}
11331145

1146+
rule.regex = std::make_shared<Regex>(rule.pattern);
1147+
const std::string regex_err = rule.regex->compile();
1148+
if (!regex_err.empty()) {
1149+
mLogger.printError("unable to load rule-file '" + ruleFile + "' - pattern '" + rule.pattern + "' failed to compile (" + regex_err + ").");
1150+
return Result::Fail;
1151+
}
1152+
11341153
mSettings.rules.emplace_back(std::move(rule));
11351154
}
11361155
} else {

lib/cppcheck.cpp

Lines changed: 14 additions & 210 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,6 @@
7272

7373
#include "xml.h"
7474

75-
#ifdef HAVE_RULES
76-
#ifdef _WIN32
77-
#define PCRE_STATIC
78-
#endif
79-
#include <pcre.h>
80-
#endif
81-
8275
class SymbolDatabase;
8376

8477
static constexpr char Version[] = CPPCHECK_VERSION_STRING;
@@ -1162,135 +1155,6 @@ bool CppCheck::hasRule(const std::string &tokenlist) const
11621155
});
11631156
}
11641157

1165-
static const char * pcreErrorCodeToString(const int pcreExecRet)
1166-
{
1167-
switch (pcreExecRet) {
1168-
case PCRE_ERROR_NULL:
1169-
return "Either code or subject was passed as NULL, or ovector was NULL "
1170-
"and ovecsize was not zero (PCRE_ERROR_NULL)";
1171-
case PCRE_ERROR_BADOPTION:
1172-
return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)";
1173-
case PCRE_ERROR_BADMAGIC:
1174-
return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, "
1175-
"to catch the case when it is passed a junk pointer and to detect when a "
1176-
"pattern that was compiled in an environment of one endianness is run in "
1177-
"an environment with the other endianness. This is the error that PCRE "
1178-
"gives when the magic number is not present (PCRE_ERROR_BADMAGIC)";
1179-
case PCRE_ERROR_UNKNOWN_NODE:
1180-
return "While running the pattern match, an unknown item was encountered in the "
1181-
"compiled pattern. This error could be caused by a bug in PCRE or by "
1182-
"overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)";
1183-
case PCRE_ERROR_NOMEMORY:
1184-
return "If a pattern contains back references, but the ovector that is passed "
1185-
"to pcre_exec() is not big enough to remember the referenced substrings, "
1186-
"PCRE gets a block of memory at the start of matching to use for this purpose. "
1187-
"If the call via pcre_malloc() fails, this error is given. The memory is "
1188-
"automatically freed at the end of matching. This error is also given if "
1189-
"pcre_stack_malloc() fails in pcre_exec(). "
1190-
"This can happen only when PCRE has been compiled with "
1191-
"--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)";
1192-
case PCRE_ERROR_NOSUBSTRING:
1193-
return "This error is used by the pcre_copy_substring(), pcre_get_substring(), "
1194-
"and pcre_get_substring_list() functions (see below). "
1195-
"It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)";
1196-
case PCRE_ERROR_MATCHLIMIT:
1197-
return "The backtracking limit, as specified by the match_limit field in a pcre_extra "
1198-
"structure (or defaulted) was reached. "
1199-
"See the description above (PCRE_ERROR_MATCHLIMIT)";
1200-
case PCRE_ERROR_CALLOUT:
1201-
return "This error is never generated by pcre_exec() itself. "
1202-
"It is provided for use by callout functions that want to yield a distinctive "
1203-
"error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)";
1204-
case PCRE_ERROR_BADUTF8:
1205-
return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, "
1206-
"and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector "
1207-
"(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 "
1208-
"character is placed in the first element, and a reason code is placed in the "
1209-
"second element. The reason codes are listed in the following section. For "
1210-
"backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated "
1211-
"UTF-8 character at the end of the subject (reason codes 1 to 5), "
1212-
"PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8";
1213-
case PCRE_ERROR_BADUTF8_OFFSET:
1214-
return "The UTF-8 byte sequence that was passed as a subject was checked and found to "
1215-
"be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of "
1216-
"startoffset did not point to the beginning of a UTF-8 character or the end of "
1217-
"the subject (PCRE_ERROR_BADUTF8_OFFSET)";
1218-
case PCRE_ERROR_PARTIAL:
1219-
return "The subject string did not match, but it did match partially. See the "
1220-
"pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)";
1221-
case PCRE_ERROR_BADPARTIAL:
1222-
return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL "
1223-
"option was used with a compiled pattern containing items that were not supported "
1224-
"for partial matching. From release 8.00 onwards, there are no restrictions on "
1225-
"partial matching (PCRE_ERROR_BADPARTIAL)";
1226-
case PCRE_ERROR_INTERNAL:
1227-
return "An unexpected internal error has occurred. This error could be caused by a bug "
1228-
"in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)";
1229-
case PCRE_ERROR_BADCOUNT:
1230-
return "This error is given if the value of the ovecsize argument is negative "
1231-
"(PCRE_ERROR_BADCOUNT)";
1232-
case PCRE_ERROR_RECURSIONLIMIT:
1233-
return "The internal recursion limit, as specified by the match_limit_recursion "
1234-
"field in a pcre_extra structure (or defaulted) was reached. "
1235-
"See the description above (PCRE_ERROR_RECURSIONLIMIT)";
1236-
case PCRE_ERROR_DFA_UITEM:
1237-
return "PCRE_ERROR_DFA_UITEM";
1238-
case PCRE_ERROR_DFA_UCOND:
1239-
return "PCRE_ERROR_DFA_UCOND";
1240-
case PCRE_ERROR_DFA_WSSIZE:
1241-
return "PCRE_ERROR_DFA_WSSIZE";
1242-
case PCRE_ERROR_DFA_RECURSE:
1243-
return "PCRE_ERROR_DFA_RECURSE";
1244-
case PCRE_ERROR_NULLWSLIMIT:
1245-
return "PCRE_ERROR_NULLWSLIMIT";
1246-
case PCRE_ERROR_BADNEWLINE:
1247-
return "An invalid combination of PCRE_NEWLINE_xxx options was "
1248-
"given (PCRE_ERROR_BADNEWLINE)";
1249-
case PCRE_ERROR_BADOFFSET:
1250-
return "The value of startoffset was negative or greater than the length "
1251-
"of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)";
1252-
case PCRE_ERROR_SHORTUTF8:
1253-
return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject "
1254-
"string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. "
1255-
"Information about the failure is returned as for PCRE_ERROR_BADUTF8. "
1256-
"It is in fact sufficient to detect this case, but this special error code for "
1257-
"PCRE_PARTIAL_HARD precedes the implementation of returned information; "
1258-
"it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)";
1259-
case PCRE_ERROR_RECURSELOOP:
1260-
return "This error is returned when pcre_exec() detects a recursion loop "
1261-
"within the pattern. Specifically, it means that either the whole pattern "
1262-
"or a subpattern has been called recursively for the second time at the same "
1263-
"position in the subject string. Some simple patterns that might do this "
1264-
"are detected and faulted at compile time, but more complicated cases, "
1265-
"in particular mutual recursions between two different subpatterns, "
1266-
"cannot be detected until run time (PCRE_ERROR_RECURSELOOP)";
1267-
case PCRE_ERROR_JIT_STACKLIMIT:
1268-
return "This error is returned when a pattern that was successfully studied "
1269-
"using a JIT compile option is being matched, but the memory available "
1270-
"for the just-in-time processing stack is not large enough. See the pcrejit "
1271-
"documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)";
1272-
case PCRE_ERROR_BADMODE:
1273-
return "This error is given if a pattern that was compiled by the 8-bit library "
1274-
"is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)";
1275-
case PCRE_ERROR_BADENDIANNESS:
1276-
return "This error is given if a pattern that was compiled and saved is reloaded on a "
1277-
"host with different endianness. The utility function pcre_pattern_to_host_byte_order() "
1278-
"can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)";
1279-
case PCRE_ERROR_DFA_BADRESTART:
1280-
return "PCRE_ERROR_DFA_BADRESTART";
1281-
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
1282-
case PCRE_ERROR_BADLENGTH:
1283-
return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)";
1284-
case PCRE_ERROR_JIT_BADOPTION:
1285-
return "This error is returned when a pattern that was successfully studied using a JIT compile "
1286-
"option is being matched, but the matching mode (partial or complete match) does not correspond "
1287-
"to any JIT compilation mode. When the JIT fast path function is used, this error may be "
1288-
"also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)";
1289-
#endif
1290-
}
1291-
return "";
1292-
}
1293-
12941158
void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
12951159
{
12961160
// There is no rule to execute
@@ -1311,73 +1175,7 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
13111175
reportOut("Processing rule: " + rule.pattern, Color::FgGreen);
13121176
}
13131177

1314-
const char *pcreCompileErrorStr = nullptr;
1315-
int erroffset = 0;
1316-
pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr);
1317-
if (!re) {
1318-
if (pcreCompileErrorStr) {
1319-
const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr);
1320-
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
1321-
emptyString,
1322-
Severity::error,
1323-
msg,
1324-
"pcre_compile",
1325-
Certainty::normal);
1326-
1327-
reportErr(errmsg);
1328-
}
1329-
continue;
1330-
}
1331-
1332-
// Optimize the regex, but only if PCRE_CONFIG_JIT is available
1333-
#ifdef PCRE_CONFIG_JIT
1334-
const char *pcreStudyErrorStr = nullptr;
1335-
pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr);
1336-
// pcre_study() returns NULL for both errors and when it can not optimize the regex.
1337-
// The last argument is how one checks for errors.
1338-
// It is NULL if everything works, and points to an error string otherwise.
1339-
if (pcreStudyErrorStr) {
1340-
const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr);
1341-
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
1342-
emptyString,
1343-
Severity::error,
1344-
msg,
1345-
"pcre_study",
1346-
Certainty::normal);
1347-
1348-
reportErr(errmsg);
1349-
// pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile().
1350-
pcre_free(re);
1351-
continue;
1352-
}
1353-
#else
1354-
const pcre_extra * const pcreExtra = nullptr;
1355-
#endif
1356-
1357-
int pos = 0;
1358-
int ovector[30]= {0};
1359-
while (pos < (int)str.size()) {
1360-
const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), (int)str.size(), pos, 0, ovector, 30);
1361-
if (pcreExecRet < 0) {
1362-
const std::string errorMessage = pcreErrorCodeToString(pcreExecRet);
1363-
if (!errorMessage.empty()) {
1364-
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
1365-
emptyString,
1366-
Severity::error,
1367-
std::string("pcre_exec failed: ") + errorMessage,
1368-
"pcre_exec",
1369-
Certainty::normal);
1370-
1371-
reportErr(errmsg);
1372-
}
1373-
break;
1374-
}
1375-
const auto pos1 = (unsigned int)ovector[0];
1376-
const auto pos2 = (unsigned int)ovector[1];
1377-
1378-
// jump to the end of the match for the next pcre_exec
1379-
pos = (int)pos2;
1380-
1178+
auto f = [&](int pos1, int pos2) {
13811179
// determine location..
13821180
std::string file = list.getSourceFilePath();
13831181
int line = 0;
@@ -1404,15 +1202,21 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list)
14041202

14051203
// Report error
14061204
reportErr(errmsg);
1407-
}
1205+
};
1206+
1207+
assert(rule.regex);
14081208

1409-
pcre_free(re);
1410-
#ifdef PCRE_CONFIG_JIT
1411-
// Free up the EXTRA PCRE value (may be NULL at this point)
1412-
if (pcreExtra) {
1413-
pcre_free_study(pcreExtra);
1209+
const std::string err = rule.regex->match(str, f);
1210+
if (!err.empty()) {
1211+
const ErrorMessage errmsg(std::list<ErrorMessage::FileLocation>(),
1212+
emptyString,
1213+
Severity::error,
1214+
err,
1215+
"pcre_exec",
1216+
Certainty::normal);
1217+
1218+
reportErr(errmsg);
14141219
}
1415-
#endif
14161220
}
14171221
}
14181222
#endif

lib/cppcheck.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
<ClCompile Include="platform.cpp" />
7979
<ClCompile Include="preprocessor.cpp" />
8080
<ClCompile Include="programmemory.cpp" />
81+
<ClCompile Include="regex.cpp" />
8182
<ClCompile Include="reverseanalyzer.cpp" />
8283
<ClCompile Include="settings.cpp" />
8384
<ClCompile Include="summaries.cpp" />
@@ -149,6 +150,7 @@
149150
<ClInclude Include="precompiled.h" />
150151
<ClInclude Include="preprocessor.h" />
151152
<ClInclude Include="programmemory.h" />
153+
<ClInclude Include="regex.h" />
152154
<ClInclude Include="reverseanalyzer.h" />
153155
<ClInclude Include="settings.h" />
154156
<ClInclude Include="smallvector.h" />

lib/lib.pri

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ HEADERS += $${PWD}/addoninfo.h \
6060
$${PWD}/precompiled.h \
6161
$${PWD}/preprocessor.h \
6262
$${PWD}/programmemory.h \
63+
$${PWD}/regex.h \
6364
$${PWD}/reverseanalyzer.h \
6465
$${PWD}/settings.h \
6566
$${PWD}/smallvector.h \
@@ -133,6 +134,7 @@ SOURCES += $${PWD}/valueflow.cpp \
133134
$${PWD}/platform.cpp \
134135
$${PWD}/preprocessor.cpp \
135136
$${PWD}/programmemory.cpp \
137+
$${PWD}/regex.cpp \
136138
$${PWD}/reverseanalyzer.cpp \
137139
$${PWD}/settings.cpp \
138140
$${PWD}/summaries.cpp \

0 commit comments

Comments
 (0)