Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
19 changes: 19 additions & 0 deletions scripts/microgenerator/templates/__init__.py.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from .client import BigQueryClient

__all__ = ("BigQueryClient",)
95 changes: 95 additions & 0 deletions scripts/microgenerator/templates/_helpers.py.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Any, Dict, List, Optional, Type


def _create_request(
request_class: Type,
path_identifier: str,
expected_args: List[str],
default_project_id: Optional[str] = None,
) -> Any:
"""
Constructs a *Request object from a class, path_identifier, and expected args.

Args:
request_class: The class of the request object to create (e.g., GetDatasetRequest).
path_identifier: The dot-separated string of resource IDs.
expected_args: An ordered list of the argument names the request object
expects (e.g., ['project_id', 'dataset_id', 'table_id']).
default_project_id: The default project ID to use if needed.

Returns:
An instantiated request object.

Examples:
>>> # Example with project_id provided in path_identifier
>>> request = _create_request(
... request_class=GetDatasetRequest,
... path_identifier="my-project.my-dataset",
... expected_args=["project_id", "dataset_id"]
... )
>>> request.project_id
'my-project'
>>> request.dataset_id
'my-dataset'

>>> # Example with project_id omitted from path_identifier, using default_project_id
>>> request = _create_request(
... request_class=GetDatasetRequest,
... path_identifier="my-dataset",
... expected_args=["project_id", "dataset_id"],
... default_project_id="my-default-project"
... )
>>> request.project_id
'my-default-project'
>>> request.dataset_id
'my-dataset'

"""
# Start of inlined parse_path_to_request_inputs
segments = path_identifier.split(".")
num_segments = len(segments)
num_expected = len(expected_args)
project_id_is_expected = "project_id" in expected_args

# Validate the number of segments.
if not (
num_segments == num_expected
or (project_id_is_expected and num_segments == num_expected - 1)
):
raise ValueError(
f"Invalid path identifier '{path_identifier}'. Expected "
f"{num_expected} parts (or {num_expected - 1} if project_id is "
f"omitted), but got {num_segments}."
)

# If project_id is implicitly expected, use the default.
if project_id_is_expected and num_segments == num_expected - 1:
if not default_project_id:
raise ValueError(
f"Missing project_id in path '{path_identifier}' and no "
"default_project_id was provided."
)
# Prepend the default project_id to the segments.
segments.insert(0, default_project_id)

request_inputs = dict(zip(expected_args, segments))
# End of inlined parse_path_to_request_inputs

# Instantiate the request object.
return request_class(**request_inputs)
29 changes: 29 additions & 0 deletions scripts/microgenerator/templates/post-processing/init.py.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from google.cloud.bigquery_v2 import gapic_version as package_version

__version__ = package_version.__version__

{% for import in imports %}
{{ import }}
{%- endfor %}

__all__ = (
{%- for item in all_list %}
"{{ item }}",
Comment on lines +22 to +27
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without context for how this is used, I wonder if this is the right level of abstraction? Seems to me that this requires whoever uses this to keep two separate lists in sync. I might prefer to see something like this:

{% for module, obj, alias in imports %}
from {{ module }} import {{ obj }} as {{ alias }}
{%- endfor }}

__all__ = (
{%- for module, obj, alias in imports %}
"{{ alias }}",

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tswast

This is a good question. In the absence of context, it makes total sense to suggest this.

It presupposes that we have discovered all the needed imports and then generate the file from scratch.
That isn't what is happening here. This is intended to quickly overwrite one GAPIC-generated file so that we can include two new lines. And to do so simply and easily.

We start with a file that looks similar to this:

from .services.dataset_service import DatasetServiceClient
from .services.job_service import JobServiceClient
...
from .types.biglake_config import BigLakeConfiguration
from .types.clustering import Clustering
...
from .types.dataset import Access
from .types.dataset import Dataset
...
from .types.dataset import DatasetAccessEntry
from .types.dataset import DatasetList
...
from .types.time_partitioning import TimePartitioning
from .types.udf_resource import UserDefinedFunctionResource

__all__ = (
    "Access",
    "AggregationThresholdPolicy",
    ...
    "BigtableColumn",
    "BigtableColumnFamily",
    ...
    "ColumnReference",
    "ConnectionProperty",
    ...
    "GetServiceAccountRequest",
    "GetServiceAccountResponse",
    ...
    "VectorSearchStatistics",
    "ViewDefinition",
)

And try to create a file that looks like this:

from .services.centralized_service import BigQueryClient  # NEW first line this alphabetical list
from .services.dataset_service import DatasetServiceClient
from .services.job_service import JobServiceClient
...
from .types.biglake_config import BigLakeConfiguration
from .types.clustering import Clustering
...
from .types.dataset import Access
from .types.dataset import Dataset
...
from .types.dataset import DatasetAccessEntry
from .types.dataset import DatasetList
...
from .types.time_partitioning import TimePartitioning
from .types.udf_resource import UserDefinedFunctionResource

__all__ = __all__ = (
    "Access",
    "AggregationThresholdPolicy",
    ...
    "BigQueryClient", # NEW line in the middle of this alphabetical list
    "BigtableColumn",
    "BigtableColumnFamily",
    ...
    "ColumnReference",
    "ConnectionProperty",
    ...
    "GetServiceAccountRequest",
    "GetServiceAccountResponse",
    ...
    "VectorSearchStatistics",
    "ViewDefinition",
)

Our process for making this file is simply;

  • read the lines in their entirety from the GAPIC generated file for the import section into a list
  • read the lines in their entirety from the GAPIC generated file for the all section into a list

Add the two lines we care about (e.g. the NEW lines that reference BigQueryClient) to their respective lists and sort the list alphabetically so the sections will come out alphabetically.

We then use those lists as inputs to the template in each section.

Trying to break the lines into component parts (module, object, alias) just complicates what is basically a read a line and then write a line operation.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tswast Following up. How do you feel about this PR. Approve? Needs more work?

{%- endfor %}
)
Loading