Skip to content

Commit 99c5739

Browse files
committed
addons: add unionzeroinit
This has bitten me before and is definitely a foot gun. GCC 15[1] changed their semantics related to zero initialization of unions; from initializing the complete union (sizeof union) to only zero initializing the first member. If the same first member is not the largest one, the state of the remaining storage is considered undefined and in practice most likely stack garbage. The unionzeroinit addon can detect such scenarios and emit a warning. It does not cover the designated initializers as I would interpret those as being intentional. Example output from one of my projects: ``` x86-decoder.c:294:7: warning: Zero initializing union Evex does not guarantee its complete storage to be zero initialized as its largest member is not declared as the first member. Consider making u32 the first member or favor memset(). [unionzeroinit-unionzeroinit] Evex evex = {0}; ^ ``` [1] https://trofi.github.io/posts/328-c-union-init-and-gcc-15.html
1 parent 4d04f41 commit 99c5739

9 files changed

Lines changed: 337 additions & 0 deletions

File tree

addons/test/unionzeroinit.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Running the test with Python 3:
2+
# Command in cppcheck directory:
3+
# PYTHONPATH=./addons python3 -m pytest addons/test/unionzeroinit.py
4+
5+
import contextlib
6+
import sys
7+
8+
from addons import cppcheckdata
9+
from addons import unionzeroinit
10+
from . import util
11+
12+
13+
@contextlib.contextmanager
14+
def run(path):
15+
sys.argv.append("--cli")
16+
util.dump_create(path)
17+
data = cppcheckdata.CppcheckData(f"{path}.dump")
18+
for cfg in data.iterconfigurations():
19+
yield cfg, data
20+
sys.argv.remove("--cli")
21+
util.dump_remove(path)
22+
23+
24+
def test_basic(capsys):
25+
with run("./addons/test/unionzeroinit/basic.c") as (cfg, data):
26+
unionzeroinit.union_zero_init(cfg, data)
27+
captured = capsys.readouterr()
28+
captured = captured.out.splitlines()
29+
json_output = util.convert_json_output(captured)
30+
assert len(json_output["unionzeroinit"]) == 3
31+
assert json_output["unionzeroinit"][0]["linenr"] == 22
32+
assert json_output["unionzeroinit"][1]["linenr"] == 23
33+
assert json_output["unionzeroinit"][2]["linenr"] == 25
34+
35+
36+
def test_array_member(capsys):
37+
with run("./addons/test/unionzeroinit/array.c") as (cfg, data):
38+
unionzeroinit.union_zero_init(cfg, data)
39+
captured = capsys.readouterr()
40+
captured = captured.out.splitlines()
41+
json_output = util.convert_json_output(captured)
42+
assert len(json_output) == 0
43+
44+
45+
def test_struct_member(capsys):
46+
with run("./addons/test/unionzeroinit/struct.c") as (cfg, data):
47+
unionzeroinit.union_zero_init(cfg, data, True)
48+
captured = capsys.readouterr()
49+
captured = captured.out.splitlines()
50+
json_output = util.convert_json_output(captured)
51+
assert len(json_output) == 0
52+
53+
54+
def test_struct_cyclic_member(capsys):
55+
with run("./addons/test/unionzeroinit/struct-cyclic.c") as (cfg, data):
56+
unionzeroinit.union_zero_init(cfg, data, True)
57+
captured = capsys.readouterr()
58+
captured = captured.out.splitlines()
59+
json_output = util.convert_json_output(captured)
60+
assert len(json_output) == 0
61+
62+
63+
def test_unknown_type(capsys):
64+
with run("./addons/test/unionzeroinit/unknown-type.c") as (cfg, data):
65+
unionzeroinit.union_zero_init(cfg, data, True)
66+
captured = capsys.readouterr()
67+
captured = captured.out.splitlines()
68+
json_output = util.convert_json_output(captured)
69+
assert len(json_output) == 0
70+
71+
72+
def test_long_long(capsys):
73+
with run("./addons/test/unionzeroinit/long-long.c") as (cfg, data):
74+
unionzeroinit.union_zero_init(cfg, data, True)
75+
captured = capsys.readouterr()
76+
captured = captured.out.splitlines()
77+
json_output = util.convert_json_output(captured)
78+
assert len(json_output) == 0
79+
80+
81+
def test_bitfields(capsys):
82+
with run("./addons/test/unionzeroinit/bitfields.c") as (cfg, data):
83+
unionzeroinit.union_zero_init(cfg, data, True)
84+
captured = capsys.readouterr()
85+
captured = captured.out.splitlines()
86+
json_output = util.convert_json_output(captured)
87+
assert len(json_output) == 0

addons/test/unionzeroinit/array.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
void foo(void) {
2+
union { int c; char s8[2]; } u = {0};
3+
}

addons/test/unionzeroinit/basic.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include <stdint.h>
2+
3+
union bad_union_0 {
4+
char c;
5+
int64_t i64;
6+
void *p;
7+
};
8+
9+
typedef union {
10+
char c;
11+
int i;
12+
} bad_union_1;
13+
14+
extern void e(union bad_union_0 *);
15+
16+
void
17+
foo(void)
18+
{
19+
union { int i; char c; } good0 = {0};
20+
union { int i; char c; } good1 = {};
21+
22+
union { char c; int i; } bad0 = {0};
23+
union bad_union_0 bad1 = {0};
24+
e(&bad1);
25+
bad_union_1 bad2 = {0};
26+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
typedef union Evex {
2+
int u32;
3+
struct {
4+
char mmm:3,
5+
b4:1,
6+
r4:1,
7+
b3:1,
8+
x3:1,
9+
r3:1;
10+
} extended;
11+
} Evex;
12+
13+
void foo(void) {
14+
Evex evex = {0};
15+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
union u {
2+
long long x;
3+
};
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
oid foo(void) {
2+
struct s { struct s *s; }
3+
union { struct s s; } u = {0};
4+
}

addons/test/unionzeroinit/struct.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
void foo(void) {
2+
union {
3+
int c;
4+
struct {
5+
char x;
6+
struct {
7+
char y;
8+
} s1;
9+
} s0;
10+
} u = {0};
11+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
union u {
2+
Unknown x;
3+
};

addons/unionzeroinit.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Detect error-prone zero initialization of unions.
4+
5+
import dataclasses
6+
import cppcheck
7+
import cppcheckdata
8+
from typing import List, Tuple
9+
10+
11+
@dataclasses.dataclass
12+
class Member:
13+
name: str
14+
size: int
15+
16+
17+
@dataclasses.dataclass
18+
class Union:
19+
name: str
20+
members: List[Member] = dataclasses.field(default_factory=list)
21+
22+
def largest_member(self):
23+
return sorted(self.members, key=lambda m: -m.size)[0]
24+
25+
def is_largest_member_first(self):
26+
sizes = [m.size for m in self.members]
27+
28+
has_unknown_sizes = 0 in sizes
29+
if has_unknown_sizes:
30+
return True
31+
32+
return sizes[0] == sorted(sizes, key=lambda s: -s)[0]
33+
34+
35+
def estimate_size_of_type(
36+
platform: cppcheckdata.Platform, type: str, pointer: bool
37+
) -> int:
38+
bits = 0
39+
if pointer:
40+
bits = platform.pointer_bit
41+
elif type == "char":
42+
bits = platform.char_bit
43+
elif type == "short":
44+
bits = platform.short_bit
45+
elif type == "int":
46+
bits = platform.int_bit
47+
elif type == "long":
48+
bits = platform.long_bit
49+
elif type == "long_long":
50+
bits = platform.long_long_bit
51+
else:
52+
# Fair estimate...
53+
bits = platform.int_bit
54+
return bits
55+
56+
57+
def tokat(token: cppcheckdata.Token, offset) -> cppcheckdata.Token:
58+
at = token.tokAt(offset)
59+
if at:
60+
return at
61+
62+
empty = {"str": ""}
63+
return cppcheckdata.Token(empty)
64+
65+
66+
def parse_array_length(token) -> int:
67+
if not tokat(token, 1).str == "[":
68+
return 1
69+
70+
nelements = 0
71+
try:
72+
nelements = int(tokat(token, 2).str)
73+
except ValueError:
74+
return 1
75+
76+
if not tokat(token, 3).str == "]":
77+
return 1
78+
79+
return nelements
80+
81+
82+
def is_zero_initialized(token):
83+
return (
84+
tokat(token, 1).str == "="
85+
and tokat(token, 2).str == "{"
86+
and (
87+
tokat(token, 3).str == "}"
88+
or (tokat(token, 3).str == "0" and tokat(token, 4).str == "}")
89+
)
90+
)
91+
92+
93+
def is_pointer(variable: cppcheckdata.Variable) -> bool:
94+
return variable.nameToken.valueType.pointer and not variable.isArray
95+
96+
97+
def accumulated_member_size(
98+
data: cppcheckdata.CppcheckData, variable: cppcheckdata.Variable
99+
) -> Tuple[str, int]:
100+
# Note that cppcheck might not be able to observe all types due to
101+
# inaccessible include(s).
102+
if not variable.nameToken.valueType:
103+
return (None, 0)
104+
105+
if variable.nameToken.valueType.type == "record":
106+
if not variable.nameToken.valueType.typeScope:
107+
return (None, 0)
108+
109+
nested_variables = variable.nameToken.valueType.typeScope.varlist
110+
111+
# Circumvent what seems to be a bug in which only the last bitfield has
112+
# its bits properly assigned.
113+
has_bitfields = any([v.nameToken.valueType.bits for v in nested_variables])
114+
if has_bitfields:
115+
return (variable.nameToken.str, len(nested_variables))
116+
117+
total_size = 0
118+
for nested in nested_variables:
119+
# Avoid potential cyclic members referring to the type currently
120+
# being traversed.
121+
if is_pointer(nested):
122+
total_size += data.platform.pointer_bit
123+
else:
124+
_, size = accumulated_member_size(data, nested.nameToken.variable)
125+
total_size += size
126+
return (variable.nameToken.str, total_size)
127+
128+
vt = variable.nameToken.valueType
129+
if vt.bits:
130+
size = vt.bits
131+
else:
132+
size = estimate_size_of_type(
133+
data.platform,
134+
variable.nameToken.valueType.type,
135+
is_pointer(variable),
136+
)
137+
if variable.isArray:
138+
size *= parse_array_length(variable.nameToken)
139+
return (variable.nameToken.str, size)
140+
141+
142+
def error_message(u: Union):
143+
return (
144+
f"Zero initializing union {u.name} does not guarantee its complete "
145+
"storage to be zero initialized as its largest member is not declared "
146+
f"as the first member. Consider making {u.largest_member().name} the "
147+
"first member or favor memset()."
148+
)
149+
150+
151+
@cppcheck.checker
152+
def union_zero_init(cfg, data, debug=False):
153+
unions_by_id = {}
154+
155+
# Detect union declarations.
156+
for scope in cfg.scopes:
157+
if not scope.type == "Union":
158+
continue
159+
160+
union = Union(name=scope.className)
161+
for variable in scope.varlist:
162+
name, size = accumulated_member_size(data, variable)
163+
union.members.append(Member(name=name, size=size))
164+
unions_by_id[scope.Id] = union
165+
166+
if debug:
167+
for id, u in unions_by_id.items():
168+
print(id, u, u.is_largest_member_first(), u.largest_member())
169+
170+
# Detect problematic union variables.
171+
for token in cfg.tokenlist:
172+
if (
173+
token.valueType
174+
and token.valueType.typeScopeId in unions_by_id
175+
and token.isName
176+
and is_zero_initialized(token)
177+
):
178+
id = token.valueType.typeScopeId
179+
if not unions_by_id[id].is_largest_member_first():
180+
cppcheck.reportError(
181+
token,
182+
"warning",
183+
error_message(unions_by_id[id]),
184+
"unionzeroinit",
185+
)

0 commit comments

Comments
 (0)