|
1 | 1 | """Schema profiling and mapping utilities for tablespec. |
2 | 2 |
|
3 | | -This module provides tools for mapping profiling data and Spark schemas to UMF format. |
| 3 | +This module provides tools for profiling DataFrames and mapping results to UMF format. |
4 | 4 |
|
5 | | -Spark-dependent components (SparkToUmfMapper) require installing tablespec[spark]: |
| 5 | +Components: |
| 6 | + - ``NativeSparkProfiler``: Profiles DataFrames using native SQL (serverless-compatible). |
| 7 | + - ``ProfileToGxMapper``: Generates GX expectations directly from profiling results. |
| 8 | + - ``SparkToUmfMapper``: Maps Spark DataFrame schema to UMF (requires pyspark). |
| 9 | +
|
| 10 | +Type mappings: |
| 11 | + - ``SPARK_TO_UMF_TYPE``: Spark DataType class name → UMF data_type string. |
| 12 | + - ``SQL_TO_UMF_TYPE``: SQL/warehouse type name → UMF data_type string (for dbt, etc.). |
| 13 | +
|
| 14 | +Spark-dependent components require installing tablespec[spark]: |
6 | 15 | pip install tablespec[spark] |
7 | 16 | """ |
8 | 17 |
|
9 | | -from tablespec.profiling.deequ_mapper import DeequToUmfMapper |
| 18 | +from tablespec.profiling.gx_expectation_builder import ProfileToGxMapper |
10 | 19 | from tablespec.profiling.types import ColumnProfile, DataFrameProfile |
11 | 20 |
|
12 | 21 | __all__ = [ |
13 | 22 | "ColumnProfile", |
14 | 23 | "DataFrameProfile", |
15 | | - "DeequToUmfMapper", |
| 24 | + "ProfileToGxMapper", |
16 | 25 | ] |
17 | 26 |
|
18 | | -# SparkToUmfMapper is available only if pyspark is installed |
| 27 | +# SparkToUmfMapper and type mapping dicts are available only if pyspark is installed |
19 | 28 | try: |
20 | | - from tablespec.profiling.spark_mapper import SparkToUmfMapper # noqa: F401 |
| 29 | + from tablespec.profiling.spark_mapper import ( # noqa: F401 |
| 30 | + SPARK_TO_UMF_TYPE, |
| 31 | + SQL_TO_UMF_TYPE, |
| 32 | + SparkToUmfMapper, |
| 33 | + ) |
21 | 34 |
|
22 | | - __all__.append("SparkToUmfMapper") |
| 35 | + __all__.extend(["SparkToUmfMapper", "SPARK_TO_UMF_TYPE", "SQL_TO_UMF_TYPE"]) |
23 | 36 | except ImportError: |
24 | 37 | # pyspark not available - SparkToUmfMapper won't be exported |
25 | 38 | pass |
| 39 | + |
| 40 | +# NativeSparkProfiler requires only pyspark (works on Connect/serverless) |
| 41 | +try: |
| 42 | + from tablespec.profiling.native_profiler import NativeSparkProfiler # noqa: F401 |
| 43 | + |
| 44 | + __all__.append("NativeSparkProfiler") |
| 45 | +except ImportError: |
| 46 | + # pyspark not available |
| 47 | + pass |
0 commit comments