Skip to content

Commit c236bc6

Browse files
committed
fix: bound py_state and harden stream/log source builders
py_state gains an optional max_state_entries cap (default infinity, so existing behavior is unchanged) enforced with atomic admission (insert_new + update_counter, ets:take on remove) so Python-driven state_set can't exhaust node memory. A reserved sentinel row holds the count and is protected from caller corruption and hidden from keys/fetch. The py:stream and setup_logging helpers that build Python source now validate module/function/kwarg names as identifiers (rejecting injection at positions where quoting is meaningless) and escape string-literal values, including newlines and other control bytes. Adds py_state_SUITE (cap accounting + sentinel) and py_stream_SUITE:test_stream_rejects_injection.
1 parent fc11f1a commit c236bc6

5 files changed

Lines changed: 234 additions & 18 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55
### Security
66

7+
- **Bounded shared state + safe stream/log builders** - `py_state` gained an optional
8+
`max_state_entries` cap (default `infinity`, unchanged behavior) enforced with atomic
9+
admission so Python-driven `state_set` can't exhaust node memory, and its size counter
10+
is protected from corruption. The `py:stream` and logging helpers that build Python
11+
source now strictly validate module/function/kwarg names as identifiers (rejecting
12+
injection at positions where quoting is meaningless) and escape string-literal values
13+
including control characters.
714
- **Validated event-loop fd handles** - The asyncio reader/writer integration no longer
815
hands Python a raw `fd_resource` pointer as an integer key. Each handle is an opaque id
916
validated against a registry on every use, so a stale, duplicate, or fabricated id is a

src/py.erl

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -413,8 +413,8 @@ stream(Module, Func, Args, Kwargs) when map_size(Kwargs) == 0 ->
413413
stream(Module, Func, Args, Kwargs) ->
414414
%% With kwargs - use eval approach
415415
Ctx = py_context_router:get_context(),
416-
ModuleBin = ensure_binary(Module),
417-
FuncBin = ensure_binary(Func),
416+
ModuleBin = valid_py_module(ensure_binary(Module)),
417+
FuncBin = valid_py_ident(ensure_binary(Func)),
418418
KwargsCode = format_kwargs(Kwargs),
419419
ArgsCode = format_args(Args),
420420
Code = iolist_to_binary([
@@ -445,16 +445,16 @@ format_args(Args) ->
445445
%% @private Format a single argument
446446
format_arg(A) when is_integer(A) -> integer_to_binary(A);
447447
format_arg(A) when is_float(A) -> float_to_binary(A);
448-
format_arg(A) when is_binary(A) -> <<"'", A/binary, "'">>;
449-
format_arg(A) when is_atom(A) -> <<"'", (atom_to_binary(A))/binary, "'">>;
448+
format_arg(A) when is_binary(A) -> <<"'", (escape_py_literal(A))/binary, "'">>;
449+
format_arg(A) when is_atom(A) -> <<"'", (escape_py_literal(atom_to_binary(A)))/binary, "'">>;
450450
format_arg(A) when is_list(A) -> iolist_to_binary([<<"[">>, format_args(A), <<"]">>]);
451451
format_arg(_) -> <<"None">>.
452452

453453
%% @private Format kwargs for Python code
454454
format_kwargs(Kwargs) when map_size(Kwargs) == 0 -> <<>>;
455455
format_kwargs(Kwargs) ->
456456
KwList = maps:fold(fun(K, V, Acc) ->
457-
KB = if is_atom(K) -> atom_to_binary(K); is_binary(K) -> K end,
457+
KB = valid_py_ident(if is_atom(K) -> atom_to_binary(K); is_binary(K) -> K end),
458458
[<<KB/binary, "=", (format_arg(V))/binary>> | Acc]
459459
end, [], Kwargs),
460460
iolist_to_binary([<<", ">>, lists:join(<<", ">>, KwList)]).
@@ -543,7 +543,9 @@ stream_start(Module, Func, Args, Opts) ->
543543
{ok, Ref}.
544544

545545
%% @private Run the streaming via Python code
546-
stream_run_python(ModuleBin, FuncBin, RefHash) ->
546+
stream_run_python(ModuleBin0, FuncBin0, RefHash) ->
547+
ModuleBin = valid_py_module(ModuleBin0),
548+
FuncBin = valid_py_ident(FuncBin0),
547549
RefHashBin = integer_to_binary(RefHash),
548550
%% Build Python code that streams values using callbacks
549551
Code = iolist_to_binary([
@@ -1070,6 +1072,51 @@ escape_python_string(Str) ->
10701072
(C) -> [C]
10711073
end, Str).
10721074

1075+
%% @private Escape a binary for safe embedding inside a single-quoted Python
1076+
%% string literal: quote, backslash, and newline/CR/tab/other control bytes that
1077+
%% would otherwise break out of or corrupt the literal.
1078+
escape_py_literal(Bin) when is_binary(Bin) ->
1079+
<< <<(escape_py_byte(B))/binary>> || <<B>> <= Bin >>.
1080+
1081+
escape_py_byte($') -> <<"\\'">>;
1082+
escape_py_byte($\\) -> <<"\\\\">>;
1083+
escape_py_byte($\n) -> <<"\\n">>;
1084+
escape_py_byte($\r) -> <<"\\r">>;
1085+
escape_py_byte($\t) -> <<"\\t">>;
1086+
escape_py_byte(B) when B < 16#20; B =:= 16#7f ->
1087+
list_to_binary(io_lib:format("\\x~2.16.0b", [B]));
1088+
escape_py_byte(B) -> <<B>>.
1089+
1090+
%% @private Validate a Python identifier ([A-Za-z_][A-Za-z0-9_]*). Crashes on a
1091+
%% non-conforming value so an attacker-controlled module/func/kwarg name can't
1092+
%% inject code at an identifier position (where quoting is meaningless).
1093+
valid_py_ident(Bin) when is_binary(Bin), byte_size(Bin) > 0 ->
1094+
case ident_ok(Bin, first) of
1095+
true -> Bin;
1096+
false -> error({invalid_python_identifier, Bin})
1097+
end;
1098+
valid_py_ident(Other) ->
1099+
error({invalid_python_identifier, Other}).
1100+
1101+
%% @private Validate a dotted Python module path (each segment an identifier).
1102+
valid_py_module(Bin) when is_binary(Bin), byte_size(Bin) > 0 ->
1103+
Segments = binary:split(Bin, <<".">>, [global]),
1104+
lists:foreach(fun valid_py_ident/1, Segments),
1105+
Bin;
1106+
valid_py_module(Other) ->
1107+
error({invalid_python_identifier, Other}).
1108+
1109+
ident_ok(<<>>, first) -> false; %% empty segment (leading/trailing/double dot)
1110+
ident_ok(<<>>, rest) -> true;
1111+
ident_ok(<<C, Rest/binary>>, first)
1112+
when (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_ ->
1113+
ident_ok(Rest, rest);
1114+
ident_ok(<<C, Rest/binary>>, rest)
1115+
when (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z);
1116+
(C >= $0 andalso C =< $9); C =:= $_ ->
1117+
ident_ok(Rest, rest);
1118+
ident_ok(_, _) -> false.
1119+
10731120
%% @doc Deactivate the current virtual environment.
10741121
%% Restores sys.path to its original state.
10751122
-spec deactivate_venv() -> ok | {error, term()}.
@@ -1262,13 +1309,13 @@ configure_logging(Opts) ->
12621309
iolist_to_binary([
12631310
"__import__('erlang').setup_logging(",
12641311
integer_to_binary(LevelInt),
1265-
", '", F, "')"
1312+
", '", escape_py_literal(F), "')"
12661313
]);
12671314
F when is_list(F) ->
12681315
iolist_to_binary([
12691316
"__import__('erlang').setup_logging(",
12701317
integer_to_binary(LevelInt),
1271-
", '", F, "')"
1318+
", '", escape_py_literal(iolist_to_binary(F)), "')"
12721319
])
12731320
end,
12741321
case eval(Code) of

src/py_state.erl

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@
7272

7373
-define(TABLE, py_state).
7474

75+
%% Reserved sentinel row holding the live entry count, used by the optional size
76+
%% cap. User keys are rejected from this slot so callers (Erlang or Python) can't
77+
%% corrupt the accounting.
78+
-define(SIZE_KEY, '$py_state_size$').
79+
7580
%%% ============================================================================
7681
%%% API
7782
%%% ============================================================================
@@ -112,28 +117,70 @@ register_callbacks() ->
112117

113118
%% @doc Fetch a value from the shared state.
114119
-spec fetch(Key :: term()) -> {ok, term()} | {error, not_found}.
120+
fetch(?SIZE_KEY) -> {error, not_found};
115121
fetch(Key) ->
116122
case ets:lookup(?TABLE, Key) of
117123
[{_, Value}] -> {ok, Value};
118124
[] -> {error, not_found}
119125
end.
120126

121127
%% @doc Store a value in the shared state.
122-
-spec store(Key :: term(), Value :: term()) -> ok.
128+
-spec store(Key :: term(), Value :: term()) -> ok | {error, full | reserved_key}.
129+
store(?SIZE_KEY, _Value) ->
130+
{error, reserved_key};
123131
store(Key, Value) ->
124-
ets:insert(?TABLE, {Key, Value}),
125-
ok.
132+
case max_entries() of
133+
infinity ->
134+
ets:insert(?TABLE, {Key, Value}),
135+
ok;
136+
Max ->
137+
%% Atomic admission: only genuinely new keys consume capacity, and the
138+
%% reserve/rollback uses ets:update_counter so there is no TOCTOU race
139+
%% on the public, write-concurrent table. Overwrites don't change count.
140+
case ets:insert_new(?TABLE, {Key, Value}) of
141+
true ->
142+
Count = ets:update_counter(?TABLE, ?SIZE_KEY, {2, 1}, {?SIZE_KEY, 0}),
143+
case Count > Max of
144+
true ->
145+
ets:delete(?TABLE, Key),
146+
ets:update_counter(?TABLE, ?SIZE_KEY, {2, -1}, {?SIZE_KEY, 0}),
147+
{error, full};
148+
false ->
149+
ok
150+
end;
151+
false ->
152+
ets:insert(?TABLE, {Key, Value}),
153+
ok
154+
end
155+
end.
126156

127157
%% @doc Remove a key from the shared state.
128158
-spec remove(Key :: term()) -> ok.
159+
remove(?SIZE_KEY) ->
160+
ok;
129161
remove(Key) ->
130-
ets:delete(?TABLE, Key),
131-
ok.
162+
case max_entries() of
163+
infinity ->
164+
ets:delete(?TABLE, Key),
165+
ok;
166+
_Max ->
167+
%% Decrement only when a real user key was actually present (ets:take so
168+
%% a missing-key remove can't drift the counter negative).
169+
case ets:take(?TABLE, Key) of
170+
[_] ->
171+
ets:update_counter(?TABLE, ?SIZE_KEY, {2, -1}, {?SIZE_KEY, 0}),
172+
ok;
173+
[] ->
174+
ok
175+
end
176+
end.
132177

133178
%% @doc Get all keys in the shared state.
134179
-spec keys() -> [term()].
135180
keys() ->
136-
ets:foldl(fun({K, _}, Acc) -> [K | Acc] end, [], ?TABLE).
181+
ets:foldl(fun({?SIZE_KEY, _}, Acc) -> Acc;
182+
({K, _}, Acc) -> [K | Acc]
183+
end, [], ?TABLE).
137184

138185
%% @doc Clear all entries from the shared state.
139186
-spec clear() -> ok.
@@ -148,6 +195,8 @@ incr(Key) ->
148195

149196
%% @doc Atomically increment a counter by Amount. Initializes to Amount if not exists.
150197
-spec incr(Key :: term(), Amount :: integer()) -> integer().
198+
incr(?SIZE_KEY, _Amount) ->
199+
error(reserved_key);
151200
incr(Key, Amount) ->
152201
try
153202
ets:update_counter(?TABLE, Key, {2, Amount})
@@ -168,6 +217,12 @@ decr(Key) ->
168217
decr(Key, Amount) ->
169218
incr(Key, -Amount).
170219

220+
%% @private Configured entry cap. `infinity' (the default) preserves the previous
221+
%% unbounded behavior; set application env `max_state_entries' to a positive
222+
%% integer to bound memory growth from Python-driven state_set calls.
223+
max_entries() ->
224+
application:get_env(erlang_python, max_state_entries, infinity).
225+
171226
%%% ============================================================================
172227
%%% Callback wrappers (for Python access)
173228
%%% ============================================================================
@@ -181,8 +236,10 @@ state_get_callback([Key]) ->
181236

182237
%% @private
183238
state_set_callback([Key, Value]) ->
184-
store(Key, Value),
185-
none.
239+
case store(Key, Value) of
240+
ok -> none;
241+
{error, Reason} -> {error, Reason}
242+
end.
186243

187244
%% @private
188245
state_delete_callback([Key]) ->

test/py_state_SUITE.erl

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
%%% @doc Common Test suite for py_state shared-state store, focused on the
2+
%%% optional entry cap and its accounting (memory-exhaustion resistance).
3+
-module(py_state_SUITE).
4+
5+
-include_lib("common_test/include/ct.hrl").
6+
7+
-export([
8+
all/0,
9+
init_per_suite/1,
10+
end_per_suite/1,
11+
init_per_testcase/2,
12+
end_per_testcase/2
13+
]).
14+
15+
-export([
16+
cap_disabled_test/1,
17+
cap_enforced_test/1,
18+
cap_accounting_test/1,
19+
sentinel_protected_test/1
20+
]).
21+
22+
-define(SIZE_KEY, '$py_state_size$').
23+
24+
all() ->
25+
[cap_disabled_test, cap_enforced_test, cap_accounting_test, sentinel_protected_test].
26+
27+
init_per_suite(Config) ->
28+
{ok, _} = application:ensure_all_started(erlang_python),
29+
Config.
30+
31+
end_per_suite(_Config) ->
32+
application:stop(erlang_python),
33+
ok.
34+
35+
init_per_testcase(_TC, Config) ->
36+
application:unset_env(erlang_python, max_state_entries),
37+
py_state:clear(),
38+
Config.
39+
40+
end_per_testcase(_TC, _Config) ->
41+
application:unset_env(erlang_python, max_state_entries),
42+
py_state:clear(),
43+
ok.
44+
45+
%% @doc Default (infinity) preserves the previous unbounded behavior.
46+
cap_disabled_test(_Config) ->
47+
[ok = py_state:store(N, N) || N <- lists:seq(1, 1000)],
48+
{ok, 500} = py_state:fetch(500),
49+
ok.
50+
51+
%% @doc A finite cap rejects new keys beyond the limit; overwrites don't consume
52+
%% capacity.
53+
cap_enforced_test(_Config) ->
54+
application:set_env(erlang_python, max_state_entries, 5),
55+
[ok = py_state:store({k, N}, N) || N <- lists:seq(1, 5)],
56+
{error, full} = py_state:store({k, 6}, 6),
57+
%% Overwriting an existing key does NOT consume capacity.
58+
ok = py_state:store({k, 3}, 30),
59+
{ok, 30} = py_state:fetch({k, 3}),
60+
{error, full} = py_state:store({k, 7}, 7),
61+
ok.
62+
63+
%% @doc Removing frees capacity; removing a missing key must not drift the count
64+
%% (which would let the cap be bypassed or never trip).
65+
cap_accounting_test(_Config) ->
66+
application:set_env(erlang_python, max_state_entries, 3),
67+
ok = py_state:store(a, 1),
68+
ok = py_state:store(b, 2),
69+
ok = py_state:store(c, 3),
70+
{error, full} = py_state:store(d, 4),
71+
%% Remove a missing key repeatedly: no phantom capacity freed.
72+
[py_state:remove(missing) || _ <- lists:seq(1, 10)],
73+
{error, full} = py_state:store(d, 4),
74+
%% Remove a real key: frees exactly one slot.
75+
ok = py_state:remove(a),
76+
ok = py_state:store(d, 4),
77+
{error, full} = py_state:store(e, 5),
78+
ok.
79+
80+
%% @doc The internal size sentinel can't be written or counter-corrupted by a
81+
%% caller, and is hidden from keys/0 and fetch/1.
82+
sentinel_protected_test(_Config) ->
83+
application:set_env(erlang_python, max_state_entries, 2),
84+
{error, reserved_key} = py_state:store(?SIZE_KEY, 9999),
85+
{'EXIT', _} = (catch py_state:incr(?SIZE_KEY, 100)),
86+
ok = py_state:store(x, 1),
87+
ok = py_state:store(y, 2),
88+
{error, full} = py_state:store(z, 3),
89+
{error, not_found} = py_state:fetch(?SIZE_KEY),
90+
false = lists:member(?SIZE_KEY, py_state:keys()),
91+
ok.

test/py_stream_SUITE.erl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
test_stream_cancel/1,
1818
test_stream_error/1,
1919
test_stream_empty/1,
20-
test_stream_large/1
20+
test_stream_large/1,
21+
test_stream_rejects_injection/1
2122
]).
2223

2324
all() ->
@@ -29,7 +30,8 @@ all() ->
2930
test_stream_cancel,
3031
test_stream_error,
3132
test_stream_empty,
32-
test_stream_large
33+
test_stream_large,
34+
test_stream_rejects_injection
3335
].
3436

3537
init_per_suite(Config) ->
@@ -40,6 +42,18 @@ end_per_suite(_Config) ->
4042
ok = application:stop(erlang_python),
4143
ok.
4244

45+
%% @doc A module/func name (or kwarg key) that isn't a valid Python identifier
46+
%% must be rejected, not interpolated into the generated source where it could
47+
%% inject code. Regression for the stream source-builder hardening.
48+
test_stream_rejects_injection(_Config) ->
49+
{'EXIT', {{invalid_python_identifier, _}, _}} =
50+
(catch py:stream(<<"os'); __import__('os').system('x">>, <<"walk">>, [], #{k => 1})),
51+
{'EXIT', {{invalid_python_identifier, _}, _}} =
52+
(catch py:stream(<<"math">>, <<"sqrt'); evil(">>, [], #{k => 1})),
53+
{'EXIT', {{invalid_python_identifier, _}, _}} =
54+
(catch py:stream(<<"math">>, <<"sqrt">>, [], #{<<"bad key)">> => 1})),
55+
ok.
56+
4357
%% Helper to collect all stream events
4458
collect_stream(Ref) ->
4559
collect_stream(Ref, [], 5000).

0 commit comments

Comments
 (0)