Skip to content

Commit b3f504b

Browse files
committed
loader: handle missing basic blocks in compute_static_layout (#2734)
Extractors such as Binary Ninja may discover basic blocks during feature extraction that are no longer present when re-enumerating basic blocks during layout computation, because the IL can be recomputed between the two passes, shifting basic block boundaries. Replace the hard assertion with a defensive check that logs a warning and skips the missing address. This prevents the AssertionError crash while preserving all valid layout data. Add a unit test that reproduces the scenario with NullStaticFeatureExtractor by removing a basic block between find_capabilities() and compute_static_layout(). Fixes #2734.
1 parent 10dfd28 commit b3f504b

2 files changed

Lines changed: 105 additions & 1 deletion

File tree

capa/loader.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,14 @@ def compute_static_layout(rules: RuleSet, extractor: StaticFeatureExtractor, cap
722722
rule = rules[rule_name]
723723
if capa.rules.Scope.BASIC_BLOCK in rule.scopes:
724724
for addr, _ in matches:
725-
assert addr in functions_by_bb
725+
if addr not in functions_by_bb:
726+
# extractors may discover basic blocks during feature extraction
727+
# that are no longer present when re-enumerating basic blocks
728+
# during layout computation. for example, Binary Ninja may
729+
# recompute IL and shift basic block boundaries between the
730+
# two passes. see #2734.
731+
logger.warning("matched basic block 0x%x not found in any function during layout computation", addr)
732+
continue
726733
matched_bbs.add(addr)
727734

728735
layout = rdoc.StaticLayout(

tests/test_layout.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import textwrap
16+
17+
import capa.main
18+
import capa.rules
19+
import capa.loader
20+
import capa.features.common
21+
import capa.features.basicblock
22+
import capa.features.extractors.null
23+
from capa.features.address import AbsoluteVirtualAddress
24+
from capa.features.extractors.base_extractor import BBHandle, SampleHashes, FunctionHandle
25+
26+
27+
def test_compute_static_layout_with_missing_basic_block():
28+
"""
29+
Test that compute_static_layout handles the case where a matched
30+
basic block address is no longer present when re-enumerating BBs.
31+
32+
This can happen with extractors like Binary Ninja where the analysis
33+
state may change between feature extraction and layout computation,
34+
causing basic block boundaries to shift.
35+
36+
See #2734.
37+
"""
38+
# Create an extractor with two basic blocks at 0x401000 and 0x401010
39+
extractor = capa.features.extractors.null.NullStaticFeatureExtractor(
40+
base_address=AbsoluteVirtualAddress(0x401000),
41+
sample_hashes=SampleHashes(
42+
md5="6eb7ee7babf913d75df3f86c229df9e7",
43+
sha1="2a082494519acd5130d5120fa48786df7275fdd7",
44+
sha256="0c7d1a34eb9fd55bedbf37ba16e3d5dd8c1dd1d002479cc4af27ef0f82bb4792",
45+
),
46+
global_features=[],
47+
file_features=[],
48+
functions={
49+
AbsoluteVirtualAddress(0x401000): capa.features.extractors.null.FunctionFeatures(
50+
features=[],
51+
basic_blocks={
52+
AbsoluteVirtualAddress(0x401000): capa.features.extractors.null.BasicBlockFeatures(
53+
features=[
54+
(AbsoluteVirtualAddress(0x401000), capa.features.common.Characteristic("tight loop")),
55+
],
56+
instructions={},
57+
),
58+
},
59+
),
60+
},
61+
)
62+
63+
rules = capa.rules.RuleSet(
64+
[
65+
capa.rules.Rule.from_yaml(
66+
textwrap.dedent(
67+
"""
68+
rule:
69+
meta:
70+
name: test rule
71+
scopes:
72+
static: basic block
73+
dynamic: process
74+
features:
75+
- characteristic: tight loop
76+
"""
77+
)
78+
),
79+
]
80+
)
81+
82+
# Find capabilities — the rule matches at BB 0x401000
83+
capabilities = capa.main.find_capabilities(rules, extractor)
84+
assert "test rule" in capabilities.matches
85+
86+
# Now simulate the regression: remove the matched BB from the extractor
87+
# so that when compute_static_layout re-enumerates BBs, it won't find it.
88+
# This simulates what happens with Binary Ninja when IL recomputation
89+
# changes basic block boundaries between the two passes.
90+
del extractor.functions[AbsoluteVirtualAddress(0x401000)].basic_blocks[AbsoluteVirtualAddress(0x401000)]
91+
92+
# Before the fix, this would raise AssertionError.
93+
# After the fix, it should complete gracefully with a warning.
94+
layout = capa.loader.compute_static_layout(rules, extractor, capabilities.matches)
95+
96+
# The layout should be valid but empty (the only matched BB was removed)
97+
assert len(layout.functions) == 0

0 commit comments

Comments
 (0)