Skip to content

Commit 1df343f

Browse files
committed
build: remove ZetaSQL custom validation and modernize dependencies for Python 3.12
This commit updates the TFDV build environment for Python 3.12 compatibility by ripping out ZetaSQL-dependent code and updating Python package constraints. Specific changes include: * Strip ZetaSQL: Removed `zetasql` and `six` from `WORKSPACE`. Removed the `custom_validation` cc_library, its pybind11 hook (`CustomValidateStatistics`), and related test targets, as ZetaSQL compilation fails on modern toolchains. * Modernize PyArrow & TF: Updated `setup.py` to allow `pyarrow>=14,<22` for Python >= 3.11 to avoid building legacy Arrow 10 source code. Relaxed the `tensorflow` constraint to `>=2.16,<2.18` and adjusted `tfx-bsl` / `tensorflow-metadata` base versions. * Fix Test Dependencies: Added `scikit-learn==1.5.1` and `scipy==1.17.0` to `install_requires` so the mutual information generators and tests can execute properly. * Cleanup Build Macros: Removed legacy Python 2 pybind11 initialization symbols (`init%s`) from `build_macros.bzl`.
1 parent 33db92f commit 1df343f

7 files changed

Lines changed: 17 additions & 109 deletions

File tree

WORKSPACE

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,6 @@ http_archive(
7171
],
7272
)
7373

74-
# Needed by abseil-py by zetasql.
75-
http_archive(
76-
name = "six_archive",
77-
build_file = "//third_party:six.BUILD",
78-
sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
79-
strip_prefix = "six-1.10.0",
80-
urls = [
81-
"http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
82-
"https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
83-
],
84-
)
85-
8674
load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
8775

8876
protobuf_deps()
@@ -112,6 +100,16 @@ http_archive(
112100
url = "https://github.com/abseil/abseil-cpp/archive/%s.tar.gz" % COM_GOOGLE_ABSL_COMMIT,
113101
)
114102

103+
104+
# re2 required for google tests
105+
http_archive(
106+
name = "com_googlesource_code_re2",
107+
# build_file = "//third_party:re2.BUILD",
108+
sha256 = "b90430b2a9240df4459108b3e291be80ae92c68a47bc06ef2dc419c5724de061",
109+
strip_prefix = "re2-a276a8c738735a0fe45a6ee590fe2df69bcf4502",
110+
urls = ["https://github.com/google/re2/archive/a276a8c738735a0fe45a6ee590fe2df69bcf4502.tar.gz"],
111+
)
112+
115113
# Will be loaded by workspace.bzl from head
116114
# TFMD_COMMIT = "404805761e614561cceedc429e67c357c62be26d" # 1.17.1
117115

@@ -218,46 +216,6 @@ load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") #, "go_repository")
218216

219217
gazelle_dependencies()
220218

221-
################################################################################
222-
# ZetaSQL #
223-
################################################################################
224-
225-
ZETASQL_COMMIT = "a516c6b26d183efc4f56293256bba92e243b7a61" # 11/01/2024
226-
227-
http_archive(
228-
name = "com_google_zetasql",
229-
patch_args = ["-p1"],
230-
patches = ["//third_party:zetasql.patch"],
231-
sha256 = "1afc2210d4aad371eff0a6bfdd8417ba99e02183a35dff167af2fa6097643f26",
232-
strip_prefix = "zetasql-%s" % ZETASQL_COMMIT,
233-
urls = ["https://github.com/google/zetasql/archive/%s.tar.gz" % ZETASQL_COMMIT],
234-
)
235-
236-
load("@com_google_zetasql//bazel:zetasql_deps_step_1.bzl", "zetasql_deps_step_1")
237-
238-
zetasql_deps_step_1()
239-
240-
load("@com_google_zetasql//bazel:zetasql_deps_step_2.bzl", "zetasql_deps_step_2")
241-
242-
zetasql_deps_step_2(
243-
analyzer_deps = True,
244-
evaluator_deps = True,
245-
java_deps = False,
246-
testing_deps = False,
247-
tools_deps = False,
248-
)
249-
250-
# No need to run zetasql_deps_step_3 and zetasql_deps_step_4 since all necessary dependencies are
251-
# already installed.
252-
253-
# load("@com_google_zetasql//bazel:zetasql_deps_step_3.bzl", "zetasql_deps_step_3")
254-
255-
# zetasql_deps_step_3()
256-
257-
# load("@com_google_zetasql//bazel:zetasql_deps_step_4.bzl", "zetasql_deps_step_4")
258-
259-
# zetasql_deps_step_4()
260-
261219
_PLATFORMS_VERSION = "0.0.6"
262220

263221
http_archive(

setup.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ def select_constraint(default, nightly=None, git_master=None):
174174
# Make sure to sync the versions of common dependencies (absl-py, numpy,
175175
# six, and protobuf) with TF.
176176
install_requires=[
177+
'scikit-learn==1.5.1',
178+
'scipy==1.17.0',
177179
'absl-py>=0.9,<2.0.0',
178180
'apache-beam[gcp]>=2.53,<3;python_version>="3.11"',
179181
'apache-beam[gcp]>=2.50,<2.51;python_version<"3.11"',
@@ -184,19 +186,20 @@ def select_constraint(default, nightly=None, git_master=None):
184186
'pandas>=1.0,<2',
185187
'protobuf>=4.25.2,<6.0.0;python_version>="3.11"',
186188
'protobuf>=4.21.6,<6.0.0;python_version<"3.11"',
187-
'pyarrow>=10,<11',
189+
'pyarrow>=10,<11;python_version<"3.11"',
190+
'pyarrow>=14,<22;python_version>="3.11"',
188191
'pyfarmhash>=0.2.2,<0.4',
189192
'six>=1.12,<2',
190-
'tensorflow>=2.17,<2.18',
193+
'tensorflow>=2.16,<2.18',
191194
'tensorflow-metadata'
192195
+ select_constraint(
193-
default='>=1.17.1,<1.18',
196+
default='>=1.17.0,<1.18',
194197
nightly='>=1.18.0.dev',
195198
git_master='@git+https://github.com/tensorflow/metadata@master',
196199
),
197200
'tfx-bsl'
198201
+ select_constraint(
199-
default='>=1.17.1,<1.18',
202+
default='>=1.17.0,<1.18',
200203
nightly='>=1.18.0.dev',
201204
git_master='@git+https://github.com/tensorflow/tfx-bsl@master',
202205
),

tensorflow_data_validation/anomalies/BUILD

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -425,38 +425,6 @@ cc_test(
425425
],
426426
)
427427

428-
cc_library(
429-
name = "custom_validation",
430-
srcs = ["custom_validation.cc"],
431-
hdrs = ["custom_validation.h"],
432-
deps = [
433-
":path",
434-
":schema",
435-
":status_util",
436-
"//tensorflow_data_validation/anomalies/proto:custom_validation_config_proto_cc_pb2",
437-
"@com_github_tensorflow_metadata//tensorflow_metadata/proto/v0:metadata_v0_proto_cc_pb2",
438-
"@com_github_tfx_bsl//tfx_bsl/cc/statistics:sql_util",
439-
"@com_google_absl//absl/base:log_severity",
440-
"@com_google_absl//absl/container:flat_hash_map",
441-
"@com_google_absl//absl/log",
442-
"@com_google_absl//absl/status",
443-
"@com_google_absl//absl/strings",
444-
"@com_google_absl//absl/types:optional",
445-
],
446-
)
447-
448-
cc_test(
449-
name = "custom_validation_test",
450-
srcs = ["custom_validation_test.cc"],
451-
deps = [
452-
":custom_validation",
453-
":test_util",
454-
"@com_github_tensorflow_metadata//tensorflow_metadata/proto/v0:metadata_v0_proto_cc_pb2",
455-
"@com_google_absl//absl/types:optional",
456-
"@com_google_googletest//:gtest_main",
457-
],
458-
)
459-
460428
cc_library(
461429
name = "telemetry",
462430
srcs = ["telemetry.cc"],

tensorflow_data_validation/anomalies/custom_validation.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ limitations under the License.
2222
#include "tensorflow_data_validation/anomalies/path.h"
2323
#include "tensorflow_data_validation/anomalies/schema_util.h"
2424
#include "tensorflow_data_validation/anomalies/status_util.h"
25-
#include "tfx_bsl/cc/statistics/sql_util.h"
2625
#include "tensorflow_metadata/proto/v0/anomalies.pb.h"
2726
#include "tensorflow_metadata/proto/v0/path.pb.h"
2827
#include "tensorflow_metadata/proto/v0/statistics.pb.h"

tensorflow_data_validation/build_macros.bzl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ def tfdv_pybind_extension(
2525
so_file = "%s%s.so" % (prefix, sname)
2626
pyd_file = "%s%s.pyd" % (prefix, sname)
2727
exported_symbols = [
28-
"init%s" % sname,
29-
"init_%s" % sname,
3028
"PyInit_%s" % sname,
3129
]
3230

tensorflow_data_validation/pywrap/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ cc_library(
4141
],
4242
features = ["-use_header_modules"],
4343
deps = [
44-
"//tensorflow_data_validation/anomalies:custom_validation",
4544
"//tensorflow_data_validation/anomalies:feature_statistics_validator",
4645
"@pybind11",
4746
],

tensorflow_data_validation/pywrap/validation_submodule.cc

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// limitations under the License.
1414
#include "tensorflow_data_validation/pywrap/validation_submodule.h"
1515

16-
#include "tensorflow_data_validation/anomalies/custom_validation.h"
1716
#include "tensorflow_data_validation/anomalies/feature_statistics_validator.h"
1817
#include "include/pybind11/pybind11.h"
1918

@@ -77,22 +76,6 @@ void DefineValidationSubmodule(py::module main_module) {
7776
}
7877
return py::bytes(anomalies_proto_string);
7978
});
80-
m.def("CustomValidateStatistics",
81-
[](const std::string& test_statistics_string,
82-
const std::string& base_statistics_string,
83-
const std::string& validations_string,
84-
const std::string& environment_string) -> py::object {
85-
std::string anomalies_proto_string;
86-
const absl::Status status =
87-
CustomValidateStatisticsWithSerializedInputs(
88-
test_statistics_string, base_statistics_string,
89-
validations_string, environment_string,
90-
&anomalies_proto_string);
91-
if (!status.ok()) {
92-
throw std::runtime_error(status.ToString());
93-
}
94-
return py::bytes(anomalies_proto_string);
95-
});
9679
}
9780

9881
} // namespace data_validation

0 commit comments

Comments
 (0)