Fix connect() for Spark Connect and simplify CI workflow #199
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PyDeequ V2 Tests | |
| on: | |
| push: | |
| branches: | |
| - "**" | |
| pull_request: | |
| branches: | |
| - "master" | |
| jobs: | |
| # V2 tests with Spark Connect (Python 3.12) | |
| v2-tests: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| - uses: actions/setup-python@v5 | |
| name: Install Python 3.12 | |
| with: | |
| python-version: "3.12" | |
| - uses: actions/setup-java@v4 | |
| name: Setup Java 17 | |
| with: | |
| distribution: "corretto" | |
| java-version: "17" | |
| - name: Download Spark 3.5 | |
| run: | | |
| curl -L -o spark-3.5.0-bin-hadoop3.tgz \ | |
| https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz | |
| tar -xzf spark-3.5.0-bin-hadoop3.tgz | |
| echo "SPARK_HOME=$PWD/spark-3.5.0-bin-hadoop3" >> $GITHUB_ENV | |
| - name: Download Deequ JAR | |
| run: | | |
| curl -L -o deequ_2.12-2.1.0b-spark-3.5.jar \ | |
| https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/deequ_2.12-2.1.0b-spark-3.5.jar | |
| - name: Install Python dependencies | |
| run: | | |
| uv pip install -e ".[dev]" --system | |
| uv pip install "pyspark[connect]==3.5.0" --system | |
| - name: Run V2 unit tests | |
| run: | | |
| pytest tests/v2/test_unit.py -v | |
| - name: Start Spark Connect Server | |
| run: | | |
| $SPARK_HOME/sbin/start-connect-server.sh \ | |
| --packages org.apache.spark:spark-connect_2.12:3.5.0 \ | |
| --jars ${{ github.workspace }}/deequ_2.12-2.1.0b-spark-3.5.jar \ | |
| --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin | |
| sleep 20 | |
| ps aux | grep SparkConnectServer | grep -v grep | |
| - name: Run V2 integration tests | |
| env: | |
| SPARK_REMOTE: "sc://localhost:15002" | |
| run: | | |
| pytest tests/v2/ -v --ignore=tests/v2/test_unit.py | |
| - name: Stop Spark Connect Server | |
| if: always() | |
| run: | | |
| $SPARK_HOME/sbin/stop-connect-server.sh || true |