Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
Merged
59 changes: 54 additions & 5 deletions google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,20 @@ def entity_id(self) -> Optional[Union[Dict[str, Any], str]]:
def __eq__(self, other):
if not isinstance(other, AccessEntry):
return NotImplemented
return self._key() == other._key()

return (
self.role == other.role
and self.entity_type == other.entity_type
and self._normalize_entity_id(self.entity_id) == self._normalize_entity_id(other.entity_id)
and self.condition == other.condition
)

@staticmethod
def _normalize_entity_id(value):
"""Ensure consistent equality for dicts like 'view'."""
if isinstance(value, dict):
return dict(sorted(value.items()))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The normalization here may not work if there's nested dictionaries, and it might happen, for example here. The most reliable way might be using json.dumps() with sort_keys=True.

@drokeye drokeye Jun 19, 2025

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @Linchin — thanks for pointing that out.

You're absolutely right that dict(sorted(...)) won't handle deeply nested dictionaries reliably. In this case though, I tested equality on AccessEntry objects with view, routine, and dataset entity types — including nested structures like the one in dataset. The current approach passed all assertions in the test, and it seems to hold up.

Totally agree that json.dumps(..., sort_keys=True) would be the most robust and reliable method. I will change it right now👍

For reference, here’s the test case I used:

def test_access_entry_view_equality(self):
    from google.cloud import bigquery

    entry1 = bigquery.dataset.AccessEntry(
        entity_type="view",
        entity_id={
            "projectId": "my_project",
            "datasetId": "my_dataset",
            "tableId": "my_table",
        },
    )

    entry2 = bigquery.dataset.AccessEntry.from_api_repr({
        "view": {
            "projectId": "my_project",
            "datasetId": "my_dataset",
            "tableId": "my_table",
        }
    })

    entry3 = bigquery.dataset.AccessEntry(
        entity_type="routine",
        entity_id={
            "projectId": "my_project",
            "datasetId": "my_dataset",
            "routineId": "my_routine",
        },
    )

    entry4 = bigquery.dataset.AccessEntry.from_api_repr({
        "routine": {
            "projectId": "my_project",
            "datasetId": "my_dataset",
            "routineId": "my_routine",
        }
    })

    entry5 = bigquery.dataset.AccessEntry(
        entity_type="dataset",
        entity_id={
            "dataset": {
                "projectId": "my_project",
                "datasetId": "my_dataset",
            },
            "target_types": "VIEWS",
        },
    )

    entry6 = bigquery.dataset.AccessEntry.from_api_repr({
        "dataset": {
            "dataset": {
                "projectId": "my_project",
                "datasetId": "my_dataset",
            },
            "target_types": "VIEWS",
        }
    })

    entry7 = bigquery.dataset.AccessEntry(
            entity_type="dataset",
            entity_id={ 
                "dataset": {
                    "location": {
                        "region": {
                            "zone": "asia-south1-a",
                            "subzone": "primary"
                        },
                        "country": "IN"
                    },
                    "datasetId": "my_dataset"
                },
                "target_types": "VIEWS"
            },
        )
        
        entry8 = bigquery.dataset.AccessEntry.from_api_repr({
            "dataset": {
                "target_types": "VIEWS",
                "dataset": {
                    "datasetId": "my_dataset",
                    "location": {
                        "country": "IN",
                        "region": {
                            "subzone": "primary",
                            "zone": "asia-south1-a"
                        }
                    }
                }
            }
        })

    assert entry1 == entry2
    assert entry3 == entry4
    assert entry5 == entry6
    assert entry7 == entry8

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for making the change, I guess all test cases passed because we didn't juggle the order of items in the nested dictionary :)

return value

def __ne__(self, other):
return not self == other
Expand Down Expand Up @@ -542,8 +555,22 @@ def to_api_repr(self):
Returns:
Dict[str, object]: Access entry represented as an API resource
"""
resource = copy.deepcopy(self._properties)
resource = {}

if self.role is not None:

@Linchin Linchin Jun 23, 2025

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't we continue using copy.deepcopy() here? I suppose it should take care of any object that needs to be duplicated recursively.

Sorry that I am only commenting on this now, somehow I missed it the last time :/

resource["role"] = self.role
if self.entity_type is not None:
resource[self.entity_type] = self.entity_id
if self.condition is not None:
resource["condition"] = self.condition.to_api_repr()

# Include any extra items preserved in _properties
for k, v in self._properties.items():
if k not in resource:
resource[k] = v

return resource


@classmethod
def from_api_repr(cls, resource: dict) -> "AccessEntry":
Expand All @@ -557,10 +584,32 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry":
google.cloud.bigquery.dataset.AccessEntry:
Access entry parsed from ``resource``.
"""

role = resource.get("role")
condition = None
if "condition" in resource:
condition = Condition.from_api_repr(resource["condition"])

entity_type = None
entity_id = None

for key, value in resource.items():

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to https://github.com/googleapis/python-bigquery/pull/2218/files#r2162644585, could you explain why we need to manually copy the resource dict? It doesn't seem very future-proof if we add any new fields in the future. Also, we deliberately used resource.copy() to make a shallow copy, mostly in order to reduce complexity and increase speed. Is deepcopy necessary here?

@drokeye drokeye Jun 24, 2025

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally switched to a manual copy/deepcopy mainly to help isolate issues during tests, especially with view/routine equality, where we needed full round-trip consistency via _properties. But I agree, it's my bad it's not ideal. Reverted this back to the original code.

if key in ("role", "condition"):
continue
entity_type = key
entity_id = value
break

entry = cls(
role=role,
entity_type=entity_type,
entity_id=entity_id,
condition=condition,
)

access_entry = cls()
access_entry._properties = resource.copy()
return access_entry
# Preserve additional _properties for roundtrip consistency:
entry._properties = dict(resource)
return entry


class Dataset(object):
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,3 +1767,26 @@ def test__hash__with_minimal_inputs(self):
description=None,
)
assert hash(cond1) is not None

def test_access_entry_view_equality(self):

from google.cloud import bigquery

entry1 = bigquery.dataset.AccessEntry(
entity_type="view",
entity_id={
"projectId": "my_project",
"datasetId": "my_dataset",
"tableId": "my_table",
},
)
entry2 = bigquery.dataset.AccessEntry.from_api_repr({
"view": {
"projectId": "my_project",
"datasetId": "my_dataset",
"tableId": "my_table",
}
})

assert entry1 == entry2

Loading