Skip to content

Commit 6d1ba2e

Browse files
fix: harden mixed Keras H5 Lambda analysis (#1422)
* fix: inspect mixed keras h5 lambda bytecode * fix: evaluate mixed Keras H5 Lambda indicators * fix: harden mixed Keras Lambda analysis * fix: inspect H5 list lambda bytecode * fix: harden mixed Keras Lambda analysis
1 parent 64e643f commit 6d1ba2e

5 files changed

Lines changed: 777 additions & 69 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5252
- treat prereleases of fixed Keras ZIP CVE-2026-1669 versions as vulnerable
5353
- detect external references in weights-only Keras HDF5 layouts without Keras metadata
5454
- bound standalone Keras HDF5 layout and external-reference analysis, and distrust artifact-controlled versions
55+
- inspect mixed dict/list Keras HDF5 Lambda bytecode with bounded, marshal-aware analysis
5556
- restrict JFrog credential forwarding to explicitly trusted HTTPS hosts
5657
- classify unavailable metadata document reads and timed-out metadata scans as operationally incomplete rather than security findings
5758
- route renamed structured JAX/Orbax JSON checkpoints, conservatively report observable bounded-prefix threats, and fail closed for oversized identified metadata

modelaudit/scanners/keras_h5_scanner.py

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
check_custom_loss_config,
2929
check_custom_metric_config,
3030
check_lambda_dict_function,
31+
check_lambda_list_function,
3132
check_subclassed_model,
3233
is_known_safe_keras_layer_class,
3334
)
@@ -1017,6 +1018,23 @@ def _check_lambda_layer(self, layer_config: dict[str, Any], result: ScanResult)
10171018
module_name = layer_config.get("module")
10181019
function_name = layer_config.get("function_name")
10191020

1021+
layer_name = layer_config.get("name", "lambda")
1022+
encoded_function_handled = False
1023+
if isinstance(function_str, dict):
1024+
encoded_function_handled = check_lambda_dict_function(
1025+
function_str,
1026+
result,
1027+
self.current_file_path,
1028+
layer_name,
1029+
)
1030+
elif isinstance(function_str, list):
1031+
encoded_function_handled = check_lambda_list_function(
1032+
function_str,
1033+
result,
1034+
self.current_file_path,
1035+
layer_name,
1036+
)
1037+
10201038
# Check if there's actual Python code to validate
10211039
if function_str and isinstance(function_str, str):
10221040
# First check if it matches safe patterns
@@ -1034,63 +1052,67 @@ def _check_lambda_layer(self, layer_config: dict[str, Any], result: ScanResult)
10341052
},
10351053
rule_code=None, # Passing check
10361054
)
1037-
return
1038-
1039-
# This might be serialized Python code
1040-
is_valid, error = validate_python_syntax(function_str)
1055+
else:
1056+
# This might be serialized Python code
1057+
is_valid, error = validate_python_syntax(function_str)
10411058

1042-
if is_valid:
1043-
# It's valid Python! Check if it's dangerous
1044-
is_dangerous, risk_desc = is_code_potentially_dangerous(function_str, "low")
1059+
if is_valid:
1060+
# It's valid Python! Check if it's dangerous
1061+
is_dangerous, risk_desc = is_code_potentially_dangerous(function_str, "low")
10451062

1046-
# Check if code is dangerous
1047-
if is_dangerous:
1048-
result.add_check(
1049-
name="Lambda Layer Code Analysis",
1050-
passed=False,
1051-
message="Lambda layer contains dangerous Python code",
1052-
severity=IssueSeverity.CRITICAL,
1053-
location=self.current_file_path,
1054-
details={
1055-
"layer_class": "Lambda",
1056-
"code_analysis": risk_desc,
1057-
"code_preview": function_str[:200] + "..." if len(function_str) > 200 else function_str,
1058-
},
1059-
rule_code="S507", # Python embedded code
1060-
)
1063+
# Check if code is dangerous
1064+
if is_dangerous:
1065+
result.add_check(
1066+
name="Lambda Layer Code Analysis",
1067+
passed=False,
1068+
message="Lambda layer contains dangerous Python code",
1069+
severity=IssueSeverity.CRITICAL,
1070+
location=self.current_file_path,
1071+
details={
1072+
"layer_class": "Lambda",
1073+
"code_analysis": risk_desc,
1074+
"code_preview": function_str[:200] + "..." if len(function_str) > 200 else function_str,
1075+
},
1076+
rule_code="S507", # Python embedded code
1077+
)
1078+
else:
1079+
# Valid Python but not dangerous - record as passed
1080+
result.add_check(
1081+
name="Lambda Layer Code Analysis",
1082+
passed=True,
1083+
message="Lambda layer contains safe Python code",
1084+
location=self.current_file_path,
1085+
details={
1086+
"layer_class": "Lambda",
1087+
"validation_status": "valid_python",
1088+
},
1089+
rule_code=None, # Passing check
1090+
)
10611091
else:
1062-
# Valid Python but not dangerous - record as passed
1063-
result.add_check(
1064-
name="Lambda Layer Code Analysis",
1065-
passed=True,
1066-
message="Lambda layer contains safe Python code",
1067-
location=self.current_file_path,
1068-
details={
1069-
"layer_class": "Lambda",
1070-
"validation_status": "valid_python",
1071-
},
1072-
rule_code=None, # Passing check
1073-
)
1074-
else:
1075-
# Not valid Python syntax - might be a configuration issue
1076-
# Only flag if it looks like attempted code execution
1077-
if any(keyword in str(layer_config) for keyword in ["eval", "exec", "compile", "__import__"]):
1078-
result.add_check(
1079-
name="Lambda Layer Suspicious Keywords Check",
1080-
passed=False,
1081-
message="Lambda layer contains suspicious configuration",
1082-
severity=IssueSeverity.WARNING,
1083-
location=self.current_file_path,
1084-
details={
1085-
"layer_class": "Lambda",
1086-
"description": self.suspicious_layer_types["Lambda"],
1087-
"layer_config": layer_config,
1088-
"validation_error": error,
1089-
},
1090-
why=get_pattern_explanation("lambda_layer"),
1091-
rule_code="S1103",
1092-
)
1093-
elif module_name or function_name:
1092+
# Not valid Python syntax - might be a configuration issue
1093+
# Only flag if it looks like attempted code execution
1094+
if any(keyword in str(layer_config) for keyword in ["eval", "exec", "compile", "__import__"]):
1095+
result.add_check(
1096+
name="Lambda Layer Suspicious Keywords Check",
1097+
passed=False,
1098+
message="Lambda layer contains suspicious configuration",
1099+
severity=IssueSeverity.WARNING,
1100+
location=self.current_file_path,
1101+
details={
1102+
"layer_class": "Lambda",
1103+
"description": self.suspicious_layer_types["Lambda"],
1104+
"layer_config": layer_config,
1105+
"validation_error": error,
1106+
},
1107+
why=get_pattern_explanation("lambda_layer"),
1108+
rule_code="S1103",
1109+
)
1110+
module_reference_values = (module_name, function_name)
1111+
has_invalid_module_reference = any(
1112+
value is not None and not isinstance(value, str) for value in module_reference_values
1113+
)
1114+
has_module_reference = any(isinstance(value, str) and bool(value.strip()) for value in module_reference_values)
1115+
if has_module_reference or has_invalid_module_reference:
10941116
# Module/function reference - check for dangerous imports
10951117
if self._is_lambda_module_reference_dangerous(module_name, function_name):
10961118
result.add_check(
@@ -1107,7 +1129,22 @@ def _check_lambda_layer(self, layer_config: dict[str, Any], result: ScanResult)
11071129
why=get_pattern_explanation("lambda_layer"),
11081130
rule_code="S1103",
11091131
)
1110-
else:
1132+
elif has_invalid_module_reference:
1133+
result.add_check(
1134+
name="Lambda Layer Module Reference Check",
1135+
passed=False,
1136+
message="Lambda layer uses malformed module/function reference metadata",
1137+
severity=IssueSeverity.WARNING,
1138+
location=self.current_file_path,
1139+
details={
1140+
"layer_class": "Lambda",
1141+
"module_type": type(module_name).__name__,
1142+
"function_type": type(function_name).__name__,
1143+
},
1144+
why="Malformed Lambda module references cannot be safely classified.",
1145+
rule_code="S1103",
1146+
)
1147+
elif not encoded_function_handled:
11111148
# Safe module reference - record as passed
11121149
result.add_check(
11131150
name="Lambda Layer Module Reference Check",
@@ -1121,9 +1158,6 @@ def _check_lambda_layer(self, layer_config: dict[str, Any], result: ScanResult)
11211158
},
11221159
rule_code=None, # Passing check
11231160
)
1124-
elif isinstance(function_str, dict):
1125-
# Keras 3.x dict-format Lambda: {"class_name": "__lambda__", "config": {"code": ...}}
1126-
check_lambda_dict_function(function_str, result, self.current_file_path, layer_config.get("name", "lambda"))
11271161
# Don't flag Lambda layers without code - they might just be placeholders
11281162

11291163
@staticmethod

0 commit comments

Comments
 (0)