Skip to content

Commit ee1f6d2

Browse files
authored
refactor!: Remove ability to compare parquet files (#20)
1 parent 4a5db74 commit ee1f6d2

2 files changed

Lines changed: 1 addition & 108 deletions

File tree

sqlcompyre/cli/__main__.py

Lines changed: 1 addition & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
# Copyright (c) QuantCo 2024-2024
1+
# Copyright (c) QuantCo 2024-2025
22
# SPDX-License-Identifier: BSD-3-Clause
33

44
import functools
55
import logging
66
import sys
7-
import tempfile
8-
import warnings
97
from dataclasses import dataclass
108
from pathlib import Path
119
from typing import Literal
@@ -105,84 +103,6 @@ def main(ctx: click.Context, format: str | None, writer: str):
105103
ctx.obj = CliConfig(writer_cls)
106104

107105

108-
@main.command()
109-
@click.argument(
110-
"left_parquet",
111-
type=click.Path(exists=True, dir_okay=True, path_type=Path),
112-
)
113-
@click.argument(
114-
"right_parquet",
115-
type=click.Path(exists=True, dir_okay=True, path_type=Path),
116-
)
117-
@table_comparison_options
118-
@click.pass_obj
119-
def parquet(
120-
obj: CliConfig,
121-
left_parquet: Path,
122-
right_parquet: Path,
123-
join_columns: str | None,
124-
hide_matching_columns: bool,
125-
float_precision: float,
126-
collation: str | None,
127-
ignore_casing: bool,
128-
infer_primary_keys: bool,
129-
):
130-
"""Compare two (sets of) parquet files using DuckDB."""
131-
# Disable some warnings emitted by duckdb-engine
132-
warnings.filterwarnings(
133-
"ignore", message="duckdb-engine doesn't yet support reflection on indices"
134-
)
135-
warnings.filterwarnings("ignore", message="Did not recognize type 'list' of column")
136-
137-
# Create temporary database for subsequent operations
138-
with tempfile.TemporaryDirectory() as tmpdir:
139-
db_path = Path(tmpdir) / "duck.db"
140-
connection_string = f"duckdb:///{db_path}"
141-
engine = sa.create_engine(connection_string)
142-
143-
# Create referenceable views on top of the parquet files
144-
left_source_path = (
145-
str(left_parquet) if left_parquet.is_file() else f"{left_parquet}/*.parquet"
146-
)
147-
left_query = (
148-
"CREATE VIEW left_parquet AS "
149-
f"(SELECT * FROM read_parquet('{left_source_path}'))"
150-
)
151-
152-
right_source_path = (
153-
str(right_parquet)
154-
if right_parquet.is_file()
155-
else f"{right_parquet}/*.parquet"
156-
)
157-
right_query = (
158-
"CREATE VIEW right_parquet AS "
159-
f"(SELECT * FROM read_parquet('{right_source_path}'))"
160-
)
161-
162-
with engine.begin() as tx:
163-
tx.execute(sa.text(left_query))
164-
tx.execute(sa.text(right_query))
165-
166-
# Once these views are created, we can actually run the comparison
167-
# Run the comparison and get the report
168-
comparison = sc.compare_tables(
169-
engine,
170-
"left_parquet",
171-
"right_parquet",
172-
join_columns=join_columns.split(",") if join_columns is not None else None,
173-
float_precision=float_precision,
174-
collation=collation,
175-
ignore_casing=ignore_casing,
176-
infer_primary_keys=infer_primary_keys,
177-
)
178-
report = comparison.summary_report()
179-
180-
# Write the report
181-
obj.writer.write(
182-
{"comparison": report}, hide_matching_columns=hide_matching_columns
183-
)
184-
185-
186106
@main.command()
187107
@click.argument("left_table")
188108
@click.argument("right_table")

tests/cli/test_main_parquet.py

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)