This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy path__init__.py
More file actions
206 lines (193 loc) · 5.94 KB
/
__init__.py
File metadata and controls
206 lines (193 loc) · 5.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Access BigQuery-specific operations and namespaces within BigQuery DataFrames.
This module provides specialized functions and sub-modules that expose BigQuery's
advanced analytics capabilities directly to DataFrames and Series. Designed for data scientists,
data engineers, and data analysts, it acts as a bridge between the intuitive
pandas-compatible API and the massive scale and power of BigQuery SQL.
Key sub-modules include:
* :mod:`bigframes.bigquery.ai`: Generative and predictive AI functions (Gemini, LLMs, BQML) for AI developers and data scientists.
* :mod:`bigframes.bigquery.ml`: Direct access to BigQuery ML model operations for building scalable ML pipelines.
* :mod:`bigframes.bigquery.obj`: Support for BigQuery object tables, essential for handling unstructured data like images and PDFs.
This module also provides direct access to optimized BigQuery functions tailored for data engineering and advanced analytics workflows:
* **JSON Processing:** High-performance functions like ``json_extract``, ``json_value``,
and ``parse_json`` for transforming semi-structured log data.
* **Geospatial Analysis:** Comprehensive geographic functions such as ``st_area``,
``st_distance``, and ``st_centroid`` (``ST_`` prefixed functions) to unlock location-based insights.
* **Array Operations:** Tools for working with BigQuery arrays, including ``array_agg``
and ``array_length``, handling nested repeated fields efficiently.
* **Vector Search:** Integration with BigQuery's vector search and indexing
capabilities for high-dimensional data, semantic search, and RAG architectures.
* **Custom SQL:** The ``sql_scalar`` function allows embedding raw SQL snippets, giving data engineers an escape hatch for complex, custom BigQuery operations.
By using these functions, data professionals can leverage BigQuery's distributed compute engine for
domain-specific tasks at petabyte scale, while maintaining a productive Python-centric development experience.
For the full list of BigQuery standard SQL functions, see:
https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference
"""
import sys
from bigframes.bigquery import ai, ml, obj
from bigframes.bigquery._operations.approx_agg import approx_top_count
from bigframes.bigquery._operations.array import (
array_agg,
array_length,
array_to_string,
)
from bigframes.bigquery._operations.datetime import (
unix_micros,
unix_millis,
unix_seconds,
)
from bigframes.bigquery._operations.geo import (
st_area,
st_buffer,
st_centroid,
st_convexhull,
st_difference,
st_distance,
st_intersection,
st_isclosed,
st_length,
st_regionstats,
st_simplify,
)
from bigframes.bigquery._operations.io import load_data
from bigframes.bigquery._operations.json import (
json_extract,
json_extract_array,
json_extract_string_array,
json_keys,
json_query,
json_query_array,
json_set,
json_value,
json_value_array,
parse_json,
to_json,
to_json_string,
)
from bigframes.bigquery._operations.mathematical import rand
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
from bigframes.bigquery._operations.table import create_external_table
from bigframes.core.logging import log_adapter
_functions = [
# approximate aggregate ops
approx_top_count,
# array ops
array_agg,
array_length,
array_to_string,
# datetime ops
unix_micros,
unix_millis,
unix_seconds,
# geo ops
st_area,
st_buffer,
st_centroid,
st_convexhull,
st_difference,
st_distance,
st_intersection,
st_isclosed,
st_length,
st_regionstats,
st_simplify,
# json ops
json_extract,
json_extract_array,
json_extract_string_array,
json_query,
json_query_array,
json_set,
json_value,
json_value_array,
parse_json,
to_json,
to_json_string,
# mathematical ops
rand,
# search ops
create_vector_index,
vector_search,
# sql ops
sql_scalar,
# struct ops
struct,
# table ops
create_external_table,
# io ops
load_data,
]
_module = sys.modules[__name__]
for f in _functions:
_decorated_object = log_adapter.method_logger(f, custom_base_name="bigquery")
setattr(_module, f.__name__, _decorated_object)
del f
__all__ = [
# approximate aggregate ops
"approx_top_count",
# array ops
"array_agg",
"array_length",
"array_to_string",
# datetime ops
"unix_micros",
"unix_millis",
"unix_seconds",
# geo ops
"st_area",
"st_buffer",
"st_centroid",
"st_convexhull",
"st_difference",
"st_distance",
"st_intersection",
"st_isclosed",
"st_length",
"st_regionstats",
"st_simplify",
# json ops
"json_extract",
"json_extract_array",
"json_extract_string_array",
"json_keys",
"json_query",
"json_query_array",
"json_set",
"json_value",
"json_value_array",
"parse_json",
"to_json",
"to_json_string",
# mathematical ops
"rand",
# search ops
"create_vector_index",
"vector_search",
# sql ops
"sql_scalar",
# struct ops
"struct",
# table ops
"create_external_table",
# io ops
"load_data",
# Modules / SQL namespaces
"ai",
"ml",
"obj",
]