Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit ae68f9b

Browse files
committed
tests: add golden sql tests on tpch queries
1 parent 077cb2e commit ae68f9b

File tree

25 files changed

+3384
-2
lines changed

25 files changed

+3384
-2
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import unittest.mock as mock
17+
18+
from google.cloud import bigquery
19+
import pytest
20+
21+
import bigframes
22+
import bigframes.testing.mocks as mocks
23+
24+
TPCH_SCHEMAS = {
25+
"LINEITEM": [
26+
bigquery.SchemaField("L_ORDERKEY", "INTEGER"),
27+
bigquery.SchemaField("L_PARTKEY", "INTEGER"),
28+
bigquery.SchemaField("L_SUPPKEY", "INTEGER"),
29+
bigquery.SchemaField("L_LINENUMBER", "INTEGER"),
30+
bigquery.SchemaField("L_QUANTITY", "FLOAT"),
31+
bigquery.SchemaField("L_EXTENDEDPRICE", "FLOAT"),
32+
bigquery.SchemaField("L_DISCOUNT", "FLOAT"),
33+
bigquery.SchemaField("L_TAX", "FLOAT"),
34+
bigquery.SchemaField("L_RETURNFLAG", "STRING"),
35+
bigquery.SchemaField("L_LINESTATUS", "STRING"),
36+
bigquery.SchemaField("L_SHIPDATE", "DATE"),
37+
bigquery.SchemaField("L_COMMITDATE", "DATE"),
38+
bigquery.SchemaField("L_RECEIPTDATE", "DATE"),
39+
bigquery.SchemaField("L_SHIPINSTRUCT", "STRING"),
40+
bigquery.SchemaField("L_SHIPMODE", "STRING"),
41+
bigquery.SchemaField("L_COMMENT", "STRING"),
42+
],
43+
"ORDERS": [
44+
bigquery.SchemaField("O_ORDERKEY", "INTEGER"),
45+
bigquery.SchemaField("O_CUSTKEY", "INTEGER"),
46+
bigquery.SchemaField("O_ORDERSTATUS", "STRING"),
47+
bigquery.SchemaField("O_TOTALPRICE", "FLOAT"),
48+
bigquery.SchemaField("O_ORDERDATE", "DATE"),
49+
bigquery.SchemaField("O_ORDERPRIORITY", "STRING"),
50+
bigquery.SchemaField("O_CLERK", "STRING"),
51+
bigquery.SchemaField("O_SHIPPRIORITY", "INTEGER"),
52+
bigquery.SchemaField("O_COMMENT", "STRING"),
53+
],
54+
"PART": [
55+
bigquery.SchemaField("P_PARTKEY", "INTEGER"),
56+
bigquery.SchemaField("P_NAME", "STRING"),
57+
bigquery.SchemaField("P_MFGR", "STRING"),
58+
bigquery.SchemaField("P_BRAND", "STRING"),
59+
bigquery.SchemaField("P_TYPE", "STRING"),
60+
bigquery.SchemaField("P_SIZE", "INTEGER"),
61+
bigquery.SchemaField("P_CONTAINER", "STRING"),
62+
bigquery.SchemaField("P_RETAILPRICE", "FLOAT"),
63+
bigquery.SchemaField("P_COMMENT", "STRING"),
64+
],
65+
"SUPPLIER": [
66+
bigquery.SchemaField("S_SUPPKEY", "INTEGER"),
67+
bigquery.SchemaField("S_NAME", "STRING"),
68+
bigquery.SchemaField("S_ADDRESS", "STRING"),
69+
bigquery.SchemaField("S_NATIONKEY", "INTEGER"),
70+
bigquery.SchemaField("S_PHONE", "STRING"),
71+
bigquery.SchemaField("S_ACCTBAL", "FLOAT"),
72+
bigquery.SchemaField("S_COMMENT", "STRING"),
73+
],
74+
"PARTSUPP": [
75+
bigquery.SchemaField("PS_PARTKEY", "INTEGER"),
76+
bigquery.SchemaField("PS_SUPPKEY", "INTEGER"),
77+
bigquery.SchemaField("PS_AVAILQTY", "INTEGER"),
78+
bigquery.SchemaField("PS_SUPPLYCOST", "FLOAT"),
79+
bigquery.SchemaField("PS_COMMENT", "STRING"),
80+
],
81+
"CUSTOMER": [
82+
bigquery.SchemaField("C_CUSTKEY", "INTEGER"),
83+
bigquery.SchemaField("C_NAME", "STRING"),
84+
bigquery.SchemaField("C_ADDRESS", "STRING"),
85+
bigquery.SchemaField("C_NATIONKEY", "INTEGER"),
86+
bigquery.SchemaField("C_PHONE", "STRING"),
87+
bigquery.SchemaField("C_ACCTBAL", "FLOAT"),
88+
bigquery.SchemaField("C_MKTSEGMENT", "STRING"),
89+
bigquery.SchemaField("C_COMMENT", "STRING"),
90+
],
91+
"NATION": [
92+
bigquery.SchemaField("N_NATIONKEY", "INTEGER"),
93+
bigquery.SchemaField("N_NAME", "STRING"),
94+
bigquery.SchemaField("N_REGIONKEY", "INTEGER"),
95+
bigquery.SchemaField("N_COMMENT", "STRING"),
96+
],
97+
"REGION": [
98+
bigquery.SchemaField("R_REGIONKEY", "INTEGER"),
99+
bigquery.SchemaField("R_NAME", "STRING"),
100+
bigquery.SchemaField("R_COMMENT", "STRING"),
101+
],
102+
}
103+
104+
105+
import freezegun
106+
import pytest
107+
108+
@pytest.fixture(autouse=True, scope="session")
109+
def freeze_time():
110+
with freezegun.freeze_time("2026-03-10 18:00:00"):
111+
yield
112+
113+
@pytest.fixture(scope="session")
114+
def tpch_session():
115+
from bigframes.testing import compiler_session
116+
117+
anonymous_dataset = bigquery.DatasetReference.from_string("bigframes-dev.tpch")
118+
location = "us-central1"
119+
120+
session = mocks.create_bigquery_session(
121+
anonymous_dataset=anonymous_dataset,
122+
location=location,
123+
)
124+
125+
def get_table_mock(table_ref):
126+
if isinstance(table_ref, str):
127+
table_ref = bigquery.TableReference.from_string(table_ref)
128+
129+
table_id = table_ref.table_id
130+
schema = TPCH_SCHEMAS.get(table_id, [])
131+
132+
table = mock.create_autospec(bigquery.Table, instance=True)
133+
table._properties = {}
134+
# mocks.create_bigquery_session's CURRENT_TIMESTAMP() returns offset-naive datetime.now()
135+
# So we should also use offset-naive here to avoid comparison errors.
136+
now = datetime.datetime.now()
137+
type(table).schema = mock.PropertyMock(return_value=schema)
138+
type(table).project = table_ref.project
139+
type(table).dataset_id = table_ref.dataset_id
140+
type(table).table_id = table_id
141+
type(table).num_rows = mock.PropertyMock(return_value=1000000)
142+
type(table).num_bytes = mock.PropertyMock(return_value=1000000)
143+
type(table).location = mock.PropertyMock(return_value=location)
144+
type(table).table_type = mock.PropertyMock(return_value="TABLE")
145+
type(table).created = mock.PropertyMock(return_value=now)
146+
type(table).modified = mock.PropertyMock(return_value=now)
147+
type(table).range_partitioning = mock.PropertyMock(return_value=None)
148+
type(table).time_partitioning = mock.PropertyMock(return_value=None)
149+
type(table).clustering_fields = mock.PropertyMock(return_value=None)
150+
return table
151+
152+
session.bqclient.get_table.side_effect = get_table_mock
153+
session._executor = compiler_session.SQLCompilerExecutor()
154+
return session
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`L_QUANTITY`,
4+
`L_EXTENDEDPRICE`,
5+
`L_DISCOUNT`,
6+
`L_TAX`,
7+
`L_RETURNFLAG`,
8+
`L_LINESTATUS`,
9+
`L_SHIPDATE`,
10+
`L_QUANTITY` AS `bfcol_7`,
11+
`L_EXTENDEDPRICE` AS `bfcol_8`,
12+
`L_DISCOUNT` AS `bfcol_9`,
13+
`L_TAX` AS `bfcol_10`,
14+
`L_RETURNFLAG` AS `bfcol_11`,
15+
`L_LINESTATUS` AS `bfcol_12`,
16+
`L_SHIPDATE` <= CAST('1998-09-02' AS DATE) AS `bfcol_13`,
17+
`L_QUANTITY` AS `bfcol_27`,
18+
`L_EXTENDEDPRICE` AS `bfcol_28`,
19+
`L_DISCOUNT` AS `bfcol_29`,
20+
`L_TAX` AS `bfcol_30`,
21+
`L_RETURNFLAG` AS `bfcol_31`,
22+
`L_LINESTATUS` AS `bfcol_32`,
23+
`L_EXTENDEDPRICE` * (
24+
1.0 - `L_DISCOUNT`
25+
) AS `bfcol_33`,
26+
`L_QUANTITY` AS `bfcol_41`,
27+
`L_EXTENDEDPRICE` AS `bfcol_42`,
28+
`L_DISCOUNT` AS `bfcol_43`,
29+
`L_RETURNFLAG` AS `bfcol_44`,
30+
`L_LINESTATUS` AS `bfcol_45`,
31+
`L_EXTENDEDPRICE` * (
32+
1.0 - `L_DISCOUNT`
33+
) AS `bfcol_46`,
34+
(
35+
`L_EXTENDEDPRICE` * (
36+
1.0 - `L_DISCOUNT`
37+
)
38+
) * (
39+
1.0 + `L_TAX`
40+
) AS `bfcol_47`
41+
FROM `bigframes-dev`.`tpch`.`LINEITEM` AS `bft_0` FOR SYSTEM_TIME AS OF '2026-03-10T18:00:00'
42+
WHERE
43+
`L_SHIPDATE` <= CAST('1998-09-02' AS DATE)
44+
), `bfcte_1` AS (
45+
SELECT
46+
`bfcol_44`,
47+
`bfcol_45`,
48+
COALESCE(SUM(`bfcol_41`), 0) AS `bfcol_55`,
49+
COALESCE(SUM(`bfcol_42`), 0) AS `bfcol_56`,
50+
COALESCE(SUM(`bfcol_46`), 0) AS `bfcol_57`,
51+
COALESCE(SUM(`bfcol_47`), 0) AS `bfcol_58`,
52+
AVG(`bfcol_41`) AS `bfcol_59`,
53+
AVG(`bfcol_42`) AS `bfcol_60`,
54+
AVG(`bfcol_43`) AS `bfcol_61`,
55+
COUNT(`bfcol_41`) AS `bfcol_62`
56+
FROM `bfcte_0`
57+
WHERE
58+
NOT `bfcol_44` IS NULL AND NOT `bfcol_45` IS NULL
59+
GROUP BY
60+
`bfcol_44`,
61+
`bfcol_45`
62+
)
63+
SELECT
64+
`bfcol_44` AS `L_RETURNFLAG`,
65+
`bfcol_45` AS `L_LINESTATUS`,
66+
`bfcol_55` AS `SUM_QTY`,
67+
`bfcol_56` AS `SUM_BASE_PRICE`,
68+
`bfcol_57` AS `SUM_DISC_PRICE`,
69+
`bfcol_58` AS `SUM_CHARGE`,
70+
`bfcol_59` AS `AVG_QTY`,
71+
`bfcol_60` AS `AVG_PRICE`,
72+
`bfcol_61` AS `AVG_DISC`,
73+
`bfcol_62` AS `COUNT_ORDER`
74+
FROM `bfcte_1`
75+
ORDER BY
76+
`bfcol_44` ASC NULLS LAST,
77+
`bfcol_45` ASC NULLS LAST

0 commit comments

Comments
 (0)