This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 68
Expand file tree
/
Copy pathcompute_options.py
More file actions
215 lines (160 loc) · 7.42 KB
/
compute_options.py
File metadata and controls
215 lines (160 loc) · 7.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Options for displaying objects."""
import dataclasses
from typing import Any, Dict, Optional
@dataclasses.dataclass
class ComputeOptions:
"""
Encapsulates the configuration for compute options.
**Examples:**
>>> import bigframes.pandas as bpd
>>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
>>> bpd.options.compute.maximum_bytes_billed = 500 # doctest: +SKIP
>>> df.to_pandas() # this should fail # doctest: +SKIP
google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required.
>>> bpd.options.compute.maximum_bytes_billed = None # reset option # doctest: +SKIP
To add multiple extra labels to a query configuration, use the `assign_extra_query_labels`
method with keyword arguments:
>>> bpd.options.compute.assign_extra_query_labels(test1=1, test2="abc") # doctest: +SKIP
>>> bpd.options.compute.extra_query_labels # doctest: +SKIP
{'test1': 1, 'test2': 'abc'}
Alternatively, you can add labels individually by directly accessing the `extra_query_labels`
dictionary:
>>> bpd.options.compute.extra_query_labels["test3"] = False # doctest: +SKIP
>>> bpd.options.compute.extra_query_labels # doctest: +SKIP
{'test1': 1, 'test2': 'abc', 'test3': False}
To remove a label from the configuration, use the `del` keyword on the desired label key:
>>> del bpd.options.compute.extra_query_labels["test1"] # doctest: +SKIP
>>> bpd.options.compute.extra_query_labels # doctest: +SKIP
{'test2': 'abc', 'test3': False}
"""
ai_ops_confirmation_threshold: Optional[int] = 0
"""
Guards against unexpected processing of large amount of rows by semantic operators.
If the number of rows exceeds the threshold, the user will be asked to confirm
their operations to resume. The default value is 0. Set the value to None
to turn off the guard.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.ai_ops_confirmation_threshold = 100 # doctest: +SKIP
Returns:
Optional[int]: Number of rows.
"""
ai_ops_threshold_autofail: bool = False
"""
Guards against unexpected processing of large amount of rows by semantic operators.
When set to True, the operation automatically fails without asking for user inputs.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.ai_ops_threshold_autofail = True # doctest: +SKIP
Returns:
bool: True if the guard is enabled.
"""
allow_large_results: Optional[bool] = None
"""
Specifies whether query results can exceed 10 GB.
Defaults to False. Setting this to False (the default) restricts results to
10 GB for potentially faster execution; BigQuery will raise an error if this
limit is exceeded. Setting to True removes this result size limit.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.allow_large_results = True # doctest: +SKIP
Returns:
bool | None: True if results > 10 GB are enabled.
"""
enable_multi_query_execution: bool = False
"""
If enabled, large queries may be factored into multiple smaller queries.
This is in order to avoid generating queries that are too complex for the
query engine to handle. However this comes at the cost of increase cost and
latency.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.enable_multi_query_execution = True # doctest: +SKIP
Returns:
bool | None: True if enabled.
"""
extra_query_labels: Dict[str, Any] = dataclasses.field(
default_factory=dict, init=False
)
"""
Stores additional custom labels for query configuration.
Returns:
Dict[str, Any] | None: Additional labels.
"""
maximum_bytes_billed: Optional[int] = None
"""
Limits the bytes billed for query jobs.
Queries that will have bytes billed beyond this limit will fail (without
incurring a charge). If unspecified, this will be set to your project
default. See `maximum_bytes_billed`:
https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.maximum_bytes_billed = 1000 # doctest: +SKIP
Returns:
int | None: Number of bytes, if set.
"""
maximum_result_rows: Optional[int] = None
"""
Limits the number of rows in an execution result.
When converting a BigQuery DataFrames object to a pandas DataFrame or Series
(e.g., using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct
iteration), the data is downloaded from BigQuery to the client machine. This
option restricts the number of rows that can be downloaded. If the number
of rows to be downloaded exceeds this limit, a
``bigframes.exceptions.MaximumResultRowsExceeded`` exception is raised.
**Examples:**
>>> import bigframes.pandas as bpd
>>> bpd.options.compute.maximum_result_rows = 1000 # doctest: +SKIP
Returns:
int | None: Number of rows, if set.
"""
semantic_ops_confirmation_threshold: Optional[int] = 0
"""
Deprecated.
.. deprecated:: 1.42.0
Semantic operators are deprecated. Please use the functions in
:mod:`bigframes.bigquery.ai` instead.
"""
semantic_ops_threshold_autofail = False
"""
Deprecated.
.. deprecated:: 1.42.0
Semantic operators are deprecated. Please use the functions in
:mod:`bigframes.bigquery.ai` instead.
"""
def assign_extra_query_labels(self, **kwargs: Any) -> None:
"""
Assigns additional custom labels for query configuration. The method updates the
`extra_query_labels` dictionary with new labels provided through keyword arguments.
Args:
kwargs (Any):
Custom labels provided as keyword arguments. Each key-value pair
in `kwargs` represents a label name and its value.
Raises:
ValueError: If a key matches one of the reserved attribute names,
specifically 'maximum_bytes_billed' or 'enable_multi_query_execution',
to prevent conflicts with built-in settings.
"""
reserved_keys = ["maximum_bytes_billed", "enable_multi_query_execution"]
for key in kwargs:
if key in reserved_keys:
raise ValueError(
f"'{key}' is a reserved attribute name. Please use "
"a different key for your custom labels to avoid "
"conflicts with built-in settings."
)
self.extra_query_labels.update(kwargs)