Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 79986dd

Browse files
fix tests
1 parent e4133f0 commit 79986dd

File tree

8 files changed

+56
-70
lines changed

8 files changed

+56
-70
lines changed

bigframes/core/sql/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949

5050

5151
def identifier(name: str) -> str:
52-
if len(name) > 63:
53-
raise ValueError("Identifier must be less than 64 characters")
52+
if len(name) > 256:
53+
raise ValueError("Identifier must be less than 256 characters")
5454
return f"`{escape_chars(name)}`"
5555

5656

bigframes/functions/_function_client.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import logging
2020
import os
2121
import random
22+
import re
2223
import shutil
2324
import string
2425
import tempfile
@@ -196,10 +197,8 @@ def create_bq_remote_function(
196197
import bigframes.core.utils
197198

198199
# removes anything that isn't letter, number or underscore
199-
sql_func_legal_name = bigframes.core.utils.label_to_identifier(
200-
name, strict=True
201-
)
202-
bq_function_name_escaped = bigframes.core.sql.identifier(sql_func_legal_name)
200+
_validate_routine_name(name)
201+
bq_function_name_escaped = bigframes.core.sql.identifier(name)
203202
create_function_ddl = f"""
204203
CREATE OR REPLACE FUNCTION `{self._gcp_project_id}.{self._bq_dataset}`.{bq_function_name_escaped}({udf_def.signature.to_sql_input_signature()})
205204
RETURNS {udf_def.signature.with_devirtualize().output.sql_type}
@@ -263,7 +262,10 @@ def provision_bq_managed_function(
263262
# Augment user package requirements with any internal package
264263
# requirements.
265264
packages = _utils.get_updated_package_requirements(
266-
packages, is_row_processor, capture_references, ignore_package_version=True
265+
packages or [],
266+
is_row_processor,
267+
capture_references,
268+
ignore_package_version=True,
267269
)
268270
if packages:
269271
managed_function_options["packages"] = packages
@@ -579,7 +581,7 @@ def provision_bq_remote_function(
579581
reuse: bool,
580582
name: str | None,
581583
package_requirements: tuple[str, ...],
582-
max_batching_rows: int,
584+
max_batching_rows: int | None,
583585
cloud_function_timeout: int | None,
584586
cloud_function_max_instance_count: int | None,
585587
cloud_function_vpc_connector: str | None,
@@ -591,7 +593,7 @@ def provision_bq_remote_function(
591593
"""Provision a BigQuery remote function."""
592594
# Augment user package requirements with any internal package
593595
# requirements
594-
package_requirements = _utils.get_updated_package_requirements(
596+
full_package_requirements = _utils.get_updated_package_requirements(
595597
package_requirements, func_signature.is_row_processor
596598
)
597599

@@ -611,7 +613,7 @@ def provision_bq_remote_function(
611613
concurrency = (workers * threads) if (expected_milli_cpus >= 1000) else 1
612614

613615
cloud_func_spec = udf_def.CloudRunFunctionConfig(
614-
code=udf_def.CodeDef.from_func(def_, package_requirements),
616+
code=udf_def.CodeDef.from_func(def_, full_package_requirements),
615617
signature=func_signature,
616618
timeout_seconds=cloud_function_timeout,
617619
max_instance_count=cloud_function_max_instance_count,
@@ -655,7 +657,7 @@ def provision_bq_remote_function(
655657
intended_rf_spec = udf_def.RemoteFunctionConfig(
656658
endpoint=cf_endpoint,
657659
connection_id=self._bq_connection_id,
658-
max_batching_rows=max_batching_rows,
660+
max_batching_rows=max_batching_rows or 1000,
659661
signature=func_signature,
660662
bq_metadata=func_signature.protocol_metadata,
661663
)
@@ -728,6 +730,15 @@ def get_bigframes_function_name(
728730
return _BQ_FUNCTION_NAME_SEPERATOR.join(parts)
729731

730732

733+
def _validate_routine_name(name: str) -> None:
734+
"""Validate that the given name is a valid BigQuery routine name."""
735+
# Routine IDs can contain only letters (a-z, A-Z), numbers (0-9), or underscores (_)
736+
if not re.match(r"^[a-zA-Z0-9_]+$", name):
737+
raise ValueError(
738+
"Routine ID can contain only letters (a-z, A-Z), numbers (0-9), or underscores (_)"
739+
)
740+
741+
731742
def _infer_milli_cpus_from_memory(memory_mib: int) -> int:
732743
# observed values, not formally documented by cloud run functions
733744
if memory_mib < 128:

bigframes/functions/_utils.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,12 @@ def _package_existed(package_requirements: list[str], package: str) -> bool:
7575

7676

7777
def get_updated_package_requirements(
78-
package_requirements=None,
79-
is_row_processor=False,
80-
capture_references=True,
81-
ignore_package_version=False,
82-
):
83-
requirements = []
78+
package_requirements: Sequence[str] = (),
79+
is_row_processor: bool = False,
80+
capture_references: bool = True,
81+
ignore_package_version: bool = False,
82+
) -> Sequence[str]:
83+
requirements: list[str] = []
8484
if capture_references:
8585
requirements.append(f"cloudpickle=={cloudpickle.__version__}")
8686

@@ -110,13 +110,12 @@ def get_updated_package_requirements(
110110
if not requirements:
111111
return package_requirements
112112

113-
if not package_requirements:
114-
package_requirements = []
113+
result = list(package_requirements)
115114
for package in requirements:
116-
if not _package_existed(package_requirements, package):
117-
package_requirements.append(package)
115+
if not _package_existed(result, package):
116+
result.append(package)
118117

119-
return sorted(package_requirements)
118+
return sorted(result)
120119

121120

122121
def clean_up_by_session_id(

bigframes/functions/function.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def output_dtype(self):
222222

223223
@property
224224
def bigframes_bigquery_function_output_dtype(self):
225-
return self.output_dtype
225+
return self.udf_def.signature.output.emulating_type.bf_type
226226

227227

228228
class BigqueryCallableRowRoutine:
@@ -296,4 +296,4 @@ def output_dtype(self):
296296

297297
@property
298298
def bigframes_bigquery_function_output_dtype(self):
299-
return self.output_dtype
299+
return self.udf_def.signature.output.emulating_type.bf_type

bigframes/functions/udf_def.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -295,19 +295,14 @@ def from_routine(
295295
## Handle input types
296296
udf_fields = []
297297

298-
if is_row_processor:
299-
if len(routine.arguments) != 1:
300-
raise ValueError(
301-
"Row processor functions must have exactly one input argument."
302-
)
303-
304-
for argument in routine.arguments:
305-
if is_row_processor:
306-
if argument.data_type.type_kind != "STRING":
298+
for i, argument in enumerate(routine.arguments):
299+
if is_row_processor and i == 0:
300+
if argument.data_type.type_kind == "STRING":
301+
udf_fields.append(UdfArg(argument.name, RowSeriesInputFieldV1()))
302+
else:
307303
raise ValueError(
308-
"Row processor functions must have STRING input type."
304+
"Row processor functions must have STRING input type as first argument."
309305
)
310-
udf_fields.append(UdfArg(argument.name, RowSeriesInputFieldV1()))
311306
udf_fields.append(UdfArg.from_sdk(argument))
312307

313308
return cls(
@@ -317,6 +312,8 @@ def from_routine(
317312

318313
@classmethod
319314
def from_py_signature(cls, signature: inspect.Signature):
315+
import bigframes.series
316+
320317
input_types: list[UdfArg] = []
321318
for parameter in signature.parameters.values():
322319
if parameter.annotation is inspect.Signature.empty:
@@ -326,6 +323,10 @@ def from_py_signature(cls, signature: inspect.Signature):
326323
f"'{parameter.name}' is missing a type annotation. "
327324
"Types are required to use udfs.",
328325
)
326+
if parameter.annotation is bigframes.series.Series:
327+
raise TypeError(
328+
"Argument type hint must be Pandas Series, not BigFrames Series."
329+
)
329330

330331
input_types.append(UdfArg.from_py_param(parameter))
331332

bigframes/testing/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ def get_function_name(func, package_requirements=None, is_row_processor=False):
513513
# Augment user package requirements with any internal package
514514
# requirements.
515515
package_requirements = bff_utils.get_updated_package_requirements(
516-
package_requirements, is_row_processor
516+
package_requirements or [], is_row_processor
517517
)
518518

519519
# Compute a unique hash representing the user code.

tests/system/large/functions/test_remote_function.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import bigframes.dataframe
3434
import bigframes.dtypes
3535
import bigframes.exceptions
36-
import bigframes.functions._utils as bff_utils
3736
import bigframes.pandas as bpd
3837
import bigframes.series
3938
from bigframes.testing.utils import (
@@ -527,22 +526,6 @@ def add_one(x):
527526
# Make a unique udf
528527
add_one_uniq, add_one_uniq_dir = make_uniq_udf(add_one)
529528

530-
# Expected cloud function name for the unique udf
531-
package_requirements = bff_utils.get_updated_package_requirements()
532-
add_one_uniq_hash = bff_utils.get_hash(add_one_uniq, package_requirements)
533-
add_one_uniq_cf_name = f"bff_{add_one_uniq_hash}_{session.session_id}"
534-
535-
# There should be no cloud function yet for the unique udf
536-
cloud_functions = list(
537-
get_cloud_functions(
538-
session.cloudfunctionsclient,
539-
session.bqclient.project,
540-
session.bqclient.location,
541-
name=add_one_uniq_cf_name,
542-
)
543-
)
544-
assert len(cloud_functions) == 0
545-
546529
# The first time both the cloud function and the bq remote function don't
547530
# exist and would be created
548531
remote_add_one = session.remote_function(
@@ -554,6 +537,9 @@ def add_one(x):
554537
cloud_function_service_account="default",
555538
)(add_one_uniq)
556539

540+
assert remote_add_one.bigframes_cloud_function is not None
541+
add_one_uniq_cf_name = remote_add_one.bigframes_cloud_function.split("/")[-1]
542+
557543
# There should have been excactly one cloud function created at this point
558544
cloud_functions = list(
559545
get_cloud_functions(
@@ -1561,7 +1547,9 @@ def square(x):
15611547
bq_routine = session.bqclient.get_routine(
15621548
square_remote.bigframes_bigquery_function
15631549
)
1564-
assert bq_routine.remote_function_options.max_batching_rows == max_batching_rows
1550+
assert bq_routine.remote_function_options.max_batching_rows == (
1551+
max_batching_rows or 1000
1552+
)
15651553

15661554
scalars_df, scalars_pandas_df = scalars_dfs
15671555

@@ -1690,7 +1678,8 @@ def test_remote_function_reflects_config_change_with_reuse(session):
16901678
def square(x):
16911679
return x * x
16921680

1693-
deploy_name = str(uuid.uuid4())
1681+
# random alphanumeric name
1682+
deploy_name = str(uuid.uuid4().hex)
16941683
square_remote = session.remote_function(
16951684
input_types=[int],
16961685
name=deploy_name,

tests/unit/functions/test_remote_function_utils.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,6 @@ def test_get_remote_function_locations(
4141
assert cf_region == expected_cf_region
4242

4343

44-
def test_get_updated_package_requirements_no_extra_package():
45-
"""Tests with no extra package."""
46-
result = _utils.get_updated_package_requirements(capture_references=False)
47-
48-
assert result is None
49-
50-
initial_packages = ["xgboost"]
51-
result = _utils.get_updated_package_requirements(
52-
initial_packages, capture_references=False
53-
)
54-
55-
assert result == initial_packages
56-
57-
5844
@patch("bigframes.functions._utils.numpy.__version__", "1.24.4")
5945
@patch("bigframes.functions._utils.pyarrow.__version__", "14.0.1")
6046
@patch("bigframes.functions._utils.pandas.__version__", "2.0.3")
@@ -100,7 +86,7 @@ def test_get_updated_package_requirements_capture_references_false():
10086
# Case 1: Only capture_references=False.
10187
result_1 = _utils.get_updated_package_requirements(capture_references=False)
10288

103-
assert result_1 is None
89+
assert len(result_1) == 0
10490

10591
# Case 2: capture_references=False but is_row_processor=True.
10692
expected_2 = ["numpy==1.24.4", "pandas==2.0.3", "pyarrow==14.0.1"]

0 commit comments

Comments
 (0)