Skip to content

Commit 70dc6bf

Browse files
authored
fix(spanner): catch recursion and decode errors in proto parsing to p… (#16561)
This PR fixes a Persistent Stored Denial of Service (DoS) vulnerability in the google-cloud-spanner Python SDK (Issue 479858035). **The Problem** When the SDK attempts to deserialize a Protobuf-encoded row (via _parse_proto() in _helpers.py) that contains a maliciously crafted "recursion bomb" (e.g., a ListValue nested 1,000+ times), it triggers a DecodeError or RecursionError. This unhandled exception crashes the consumer thread and blocks the entire result set stream ("pipeline blackhole"). **The Solution** We modify _parse_proto to wrap the ParseFromString() call in a defensive try...except block: Catch RecursionError (triggered if Python hits its stack limit first in pure Python implementations). Catch google.protobuf.message.DecodeError (triggered by the C++ extension's internal limits). If an error is caught: A warning is logged. The original raw bytes_value is returned as a fallback (consistent with existing behavior when no prototype is found). This allows the stream iterator to continue processing subsequent rows.
1 parent c5728b2 commit 70dc6bf

File tree

2 files changed

+64
-5
lines changed

2 files changed

+64
-5
lines changed

packages/google-cloud-spanner/google/cloud/spanner_v1/_helpers.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from google.api_core.exceptions import Aborted
2929
from google.cloud._helpers import _date_from_iso8601_date
3030
from google.protobuf.internal.enum_type_wrapper import EnumTypeWrapper
31-
from google.protobuf.message import Message
31+
from google.protobuf.message import DecodeError, Message
3232
from google.protobuf.struct_pb2 import ListValue, Value
3333
from google.rpc.error_details_pb2 import RetryInfo
3434

@@ -76,7 +76,7 @@
7676

7777
GOOGLE_CLOUD_REGION_GLOBAL = "global"
7878

79-
log = logging.getLogger(__name__)
79+
_LOGGER = logging.getLogger(__name__)
8080

8181
_cloud_region: str = None
8282

@@ -122,7 +122,7 @@ def _get_cloud_region() -> str:
122122
else:
123123
_cloud_region = GOOGLE_CLOUD_REGION_GLOBAL
124124
except Exception as e:
125-
log.warning(
125+
_LOGGER.warning(
126126
"Failed to detect GCP resource location for Spanner metrics, defaulting to 'global'. Error: %s",
127127
e,
128128
)
@@ -603,8 +603,14 @@ def _parse_proto(value_pb, column_info, field_name):
603603
default_proto_message = column_info.get(field_name)
604604
if isinstance(default_proto_message, Message):
605605
proto_message = type(default_proto_message)()
606-
proto_message.ParseFromString(bytes_value)
607-
return proto_message
606+
try:
607+
proto_message.ParseFromString(bytes_value)
608+
return proto_message
609+
except (DecodeError, RecursionError):
610+
_LOGGER.warning(
611+
"Field could not be parsed as Proto due to excessive nesting/corruption. Returning raw bytes."
612+
)
613+
return bytes_value
608614
return bytes_value
609615

610616

packages/google-cloud-spanner/tests/unit/test__helpers.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,59 @@ def test_w_proto_message(self):
771771
self._callFUT(value_pb, field_type, field_name, column_info), VALUE
772772
)
773773

774+
def test_w_proto_message_decode_error(self):
775+
import base64
776+
from unittest import mock
777+
778+
from google.protobuf.message import DecodeError
779+
from google.protobuf.struct_pb2 import Value
780+
781+
from google.cloud.spanner_v1 import Type, TypeCode
782+
783+
from .testdata import singer_pb2
784+
785+
VALUE = singer_pb2.SingerInfo(singer_id=1, nationality="Canadian")
786+
field_type = Type(code=TypeCode.PROTO)
787+
field_name = "proto_message_column"
788+
raw_bytes = VALUE.SerializeToString()
789+
value_pb = Value(string_value=base64.b64encode(raw_bytes).decode("utf-8"))
790+
column_info = {"proto_message_column": singer_pb2.SingerInfo()}
791+
792+
# Mock ParseFromString to raise DecodeError
793+
with mock.patch.object(
794+
singer_pb2.SingerInfo,
795+
"ParseFromString",
796+
side_effect=DecodeError("Mock Decode Error"),
797+
):
798+
result = self._callFUT(value_pb, field_type, field_name, column_info)
799+
# Should return raw bytes
800+
self.assertEqual(result, raw_bytes)
801+
802+
def test_w_proto_message_recursion_error(self):
803+
import base64
804+
from unittest import mock
805+
806+
from google.protobuf.struct_pb2 import Value
807+
808+
from google.cloud.spanner_v1 import Type, TypeCode
809+
810+
from .testdata import singer_pb2
811+
812+
VALUE = singer_pb2.SingerInfo(singer_id=1, nationality="Canadian")
813+
field_type = Type(code=TypeCode.PROTO)
814+
field_name = "proto_message_column"
815+
raw_bytes = VALUE.SerializeToString()
816+
value_pb = Value(string_value=base64.b64encode(raw_bytes).decode("utf-8"))
817+
column_info = {"proto_message_column": singer_pb2.SingerInfo()}
818+
819+
with mock.patch.object(
820+
singer_pb2.SingerInfo,
821+
"ParseFromString",
822+
side_effect=RecursionError("Mock Recursion Error"),
823+
):
824+
result = self._callFUT(value_pb, field_type, field_name, column_info)
825+
self.assertEqual(result, raw_bytes)
826+
774827
def test_w_proto_enum(self):
775828
from google.protobuf.struct_pb2 import Value
776829

0 commit comments

Comments
 (0)