Skip to content

Commit df422b3

Browse files
refactor(iast): code safety and memory improvements (#18002)
## Description This PR updates the AppSec code to harden it against previously-unhandled errors: - Uninitialized `builtins_denylist` entries read by `str_in_list` - `strdup` failure returns -1 without `PyErr_NoMemory` call - `PyList_New returning` NULL but proceeding with `PyList_Append` - `PyUnicode_FromString` returning NULL being passed to `PyObject_CallMethodObjArgs` - `new_pyobject_id` returning NULL but being used for `set_ranges` - `PyTuple_New` returning NULL but being used for `PyTuple_SET_ITEM` - `safe_allocate_tainted_object` unchecked NULL values - `get_range_by_hash` unchecked nullptr dereference - `PyTuple_Size` error -1 cast to `SIZE_MAX`, hiding original exception - NULL `PyUnicode_AsUTF8` silently becomes default encoding - Duplicate call to `kwnames_to_kwargs` resulting in one leaked dict every call - `modules_list` and `strdup`'d tokens leaked on `strdup` error paths Co-authored-by: thomas.kowalski <thomas.kowalski@datadoghq.com>
1 parent 62f01e8 commit df422b3

8 files changed

Lines changed: 79 additions & 18 deletions

File tree

ddtrace/appsec/_iast/_ast/iastpatch.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ static int
250250
str_in_list(const char* needle, const char** list, size_t count)
251251
{
252252
for (size_t i = 0; i < count; i++) {
253-
if (strncmp(needle, list[i], strlen(list[i])) == 0) {
253+
if (list[i] && strncmp(needle, list[i], strlen(list[i])) == 0) {
254254
return 1;
255255
}
256256
}
@@ -352,13 +352,21 @@ get_list_from_env(const char* env_var_name, size_t* count)
352352
return NULL;
353353
}
354354
env_copy = strdup(env_value);
355-
if (!env_copy)
355+
if (!env_copy) {
356+
free(modules_list);
357+
modules_list = NULL;
356358
return NULL;
359+
}
357360
count_tmp = 0;
358361
token = strtok(env_copy, ",");
359362
while (token) {
360363
char* dup = strdup(token);
361364
if (!dup) {
365+
for (size_t j = 0; j < count_tmp; j++) {
366+
free(modules_list[j]);
367+
}
368+
free(modules_list);
369+
modules_list = NULL;
362370
free(env_copy);
363371
return NULL;
364372
}
@@ -408,19 +416,32 @@ init_globals(void)
408416
return -1;
409417
}
410418

411-
builtin_count = (size_t)PyTuple_Size(builtin_names);
419+
/* PyTuple_Size returns -1 on error; guard before casting to size_t */
420+
Py_ssize_t builtin_count_s = PyTuple_Size(builtin_names);
421+
if (builtin_count_s < 0) {
422+
return -1;
423+
}
424+
builtin_count = (size_t)builtin_count_s;
412425
builtins_denylist_count = static_stdlib_denylist_count + builtin_count;
413426

414427
builtins_denylist = (char**)malloc(builtins_denylist_count * sizeof(char*));
415428
if (!builtins_denylist) {
416429
PyErr_NoMemory();
417430
return -1;
418431
}
432+
/* Initialize all entries to NULL so str_in_list never reads garbage */
433+
memset(builtins_denylist, 0, builtins_denylist_count * sizeof(char*));
434+
419435
/* Copy static stdlib names */
420436
for (i = 0; i < static_stdlib_denylist_count; i++) {
421437
char* dup = strdup(static_stdlib_denylist[i]);
422-
if (!dup)
438+
if (!dup) {
439+
PyErr_NoMemory();
440+
free_list(builtins_denylist, builtins_denylist_count);
441+
builtins_denylist = NULL;
442+
builtins_denylist_count = 0;
423443
return -1;
444+
}
424445
for (char* p = dup; *p; p++) {
425446
*p = tolower(*p);
426447
}
@@ -430,17 +451,23 @@ init_globals(void)
430451
/* Copy built-in module names */
431452
for (size_t i = 0; i < builtin_count; i++) {
432453
PyObject* item = PyTuple_GetItem(builtin_names, i); /* borrowed reference */
433-
if (PyUnicode_Check(item)) {
454+
if (item && PyUnicode_Check(item)) {
434455
const char* s = PyUnicode_AsUTF8(item);
435456
if (s) {
436457
char* dup = strdup(s);
437-
if (!dup)
458+
if (!dup) {
459+
PyErr_NoMemory();
460+
free_list(builtins_denylist, builtins_denylist_count);
461+
builtins_denylist = NULL;
462+
builtins_denylist_count = 0;
438463
return -1;
464+
}
439465
for (char* p = dup; *p; p++) {
440466
*p = tolower(*p);
441467
}
442468
builtins_denylist[static_stdlib_denylist_count + i] = dup;
443469
}
470+
/* NULL entries left as NULL (from memset above) when s is NULL */
444471
}
445472
}
446473

ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,29 @@ api_extend_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)
3232
auto ctx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text, to_add });
3333
if (not ctx_map or ctx_map->empty()) {
3434
auto method_name = PyUnicode_FromString("extend");
35+
if (method_name == nullptr) {
36+
return nullptr;
37+
}
3538
PyObject_CallMethodObjArgs(candidate_text, method_name, to_add, nullptr);
39+
Py_DecRef(method_name);
3640
if (has_pyerr()) {
37-
Py_DecRef(method_name);
3841
return nullptr;
3942
}
40-
Py_DecRef(method_name);
4143
} else {
4244
const auto& to_candidate = get_tainted_object(candidate_text, ctx_map);
4345
auto to_result = safe_allocate_tainted_object_copy(to_candidate);
4446
const auto& to_toadd = get_tainted_object(to_add, ctx_map);
4547

4648
// Ensure no returns are done before this method call
4749
auto method_name = PyUnicode_FromString("extend");
50+
if (method_name == nullptr) {
51+
return nullptr;
52+
}
4853
PyObject_CallMethodObjArgs(candidate_text, method_name, to_add, nullptr);
54+
Py_DecRef(method_name);
4955
if (has_pyerr()) {
50-
Py_DecRef(method_name);
5156
return nullptr;
5257
}
53-
Py_DecRef(method_name);
5458

5559
if (to_result == nullptr) {
5660
Py_RETURN_NONE;

ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ api_format_aspect(StrType& candidate_text,
5959
StrType result_text = get<0>(result);
6060
TaintRangeRefs result_ranges = get<1>(result);
6161
PyObject* new_result = new_pyobject_id(result_text.ptr());
62+
if (new_result == nullptr) {
63+
return result_text;
64+
}
6265
set_ranges(new_result, result_ranges, tx_map);
6366
return py::reinterpret_steal<StrType>(new_result);
6467
}

ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs)
154154
if (iterator != nullptr) {
155155
PyObject* item;
156156
PyObject* list_aux = PyList_New(0);
157+
if (list_aux == nullptr) {
158+
Py_DECREF(iterator);
159+
return nullptr;
160+
}
157161
while ((item = PyIter_Next(iterator))) {
158162
PyList_Append(list_aux, item);
159163
Py_DECREF(item);

ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ add_aspect(PyObject* result_o,
4848

4949
if (!to_candidate_text) {
5050
const auto tainted = safe_allocate_tainted_object();
51+
if (tainted == nullptr) {
52+
return result_o;
53+
}
5154
tainted->add_ranges_shifted(to_text_to_add, static_cast<RANGE_START>(len_candidate_text));
5255
set_tainted_object(result_o, tainted, tx_taint_map);
5356
return result_o;
@@ -56,6 +59,9 @@ add_aspect(PyObject* result_o,
5659
// At this point we have both to_candidate_text and to_text_to_add so we add the
5760
// ranges from both to result_o
5861
const auto tainted = safe_allocate_tainted_object_copy(to_candidate_text);
62+
if (tainted == nullptr) {
63+
return result_o;
64+
}
5965
tainted->add_ranges_shifted(to_text_to_add, static_cast<RANGE_START>(len_candidate_text));
6066
set_tainted_object(result_o, tainted, tx_taint_map);
6167

ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ call_original_function(PyObject* orig_function,
3535
py::args py_args(py_args_list);
3636

3737
PyObject* kwargs = kwnames_to_kwargs(args, nargs, kwnames);
38-
auto res = PyObject_Call(orig_function, py_args.ptr(), kwnames_to_kwargs(args, nargs, kwnames));
39-
Py_DECREF(kwargs);
38+
auto res = PyObject_Call(orig_function, py_args.ptr(), kwargs);
39+
Py_XDECREF(kwargs);
4040
return res;
4141
}
4242

@@ -166,7 +166,13 @@ api_str_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs, Py
166166
}
167167

168168
const char* encoding = has_encoding ? PyUnicode_AsUTF8(pyo_encoding) : "utf-8";
169+
if (has_encoding && encoding == nullptr) {
170+
return nullptr; // PyUnicode_AsUTF8 failed; exception already set
171+
}
169172
const char* errors = has_errors ? PyUnicode_AsUTF8(pyo_errors) : "strict";
173+
if (has_errors && errors == nullptr) {
174+
return nullptr; // PyUnicode_AsUTF8 failed; exception already set
175+
}
170176
result_o = PyUnicode_Decode(text_raw_bytes, text_raw_bytes_size, encoding, errors);
171177

172178
if (PyErr_Occurred()) {

ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ api_convert_escaped_text_to_taint_text(const py::bytearray& taint_escaped_text,
135135

136136
const std::tuple result = convert_escaped_text_to_taint_text<py::bytes>(bytes_text, ranges_orig);
137137
PyObject* new_result = new_pyobject_id((py::bytearray() + get<0>(result)).ptr());
138-
138+
if (new_result == nullptr) {
139+
return taint_escaped_text;
140+
}
139141
set_ranges(new_result, get<1>(result), tx_map);
140142

141143
return py::reinterpret_steal<py::bytearray>(new_result);
@@ -153,7 +155,9 @@ api_convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const
153155
}
154156
auto [result_text, result_ranges] = convert_escaped_text_to_taint_text<StrType>(taint_escaped_text, ranges_orig);
155157
PyObject* new_result = new_pyobject_id(result_text.ptr());
156-
158+
if (new_result == nullptr) {
159+
return taint_escaped_text;
160+
}
157161
set_ranges(new_result, result_ranges, tx_map);
158162
return py::reinterpret_steal<StrType>(new_result);
159163
}
@@ -256,8 +260,10 @@ convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const Tain
256260
id_evidence = get<0>(previous_context);
257261
const shared_ptr<TaintRange>& original_range =
258262
get_range_by_hash(getNum(id_evidence), optional_ranges_orig);
259-
ranges.emplace_back(
260-
safe_allocate_taint_range(start, length, original_range->source, original_range->secure_marks));
263+
if (original_range != nullptr) {
264+
ranges.emplace_back(safe_allocate_taint_range(
265+
start, length, original_range->source, original_range->secure_marks));
266+
}
261267
}
262268
latest_end = end;
263269
}
@@ -288,8 +294,10 @@ convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const Tain
288294
id_evidence = get<0>(context);
289295
const shared_ptr<TaintRange>& original_range =
290296
get_range_by_hash(getNum(id_evidence), optional_ranges_orig);
291-
ranges.emplace_back(
292-
safe_allocate_taint_range(start, end - start, original_range->source, original_range->secure_marks));
297+
if (original_range != nullptr) {
298+
ranges.emplace_back(safe_allocate_taint_range(
299+
start, end - start, original_range->source, original_range->secure_marks));
300+
}
293301
}
294302
latest_end = end;
295303
}

ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ process_flag_added_args(PyObject* orig_function, const int flag_added_args, PyOb
201201
if (flag_added_args > 0) {
202202
const Py_ssize_t num_args = PyTuple_Size(args);
203203
PyObject* sliced_args = PyTuple_New(num_args - flag_added_args);
204+
if (sliced_args == nullptr) {
205+
return nullptr;
206+
}
204207
for (Py_ssize_t i = 0; i < num_args - flag_added_args; ++i) {
205208
// PyTuple_SET_ITEM(sliced_args, i, PyTuple_GET_ITEM(args, i + flag_added_args));
206209
PyObject* item = PyTuple_GetItem(args, i + flag_added_args);

0 commit comments

Comments
 (0)