Skip to content

feat: add array_normalize scalar function #454

feat: add array_normalize scalar function

feat: add array_normalize scalar function #454

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Detect semver-incompatible (breaking) API changes in crates modified by a PR.
#
# Only public workspace crates that have file changes are checked.
# Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded.
#
# This workflow only runs cargo-semver-checks and uploads the result as an
# artifact. The actual PR comment is posted by a companion workflow
# (`breaking_changes_detector_comment.yml`) that picks up the artifact via
# `workflow_run`.
#
# Why split it?
# "The GITHUB_TOKEN has read-only permissions in pull requests from forked
# repositories."
# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
# A read-only token cannot post comments, so on fork PRs the previous
# single-workflow design failed with HTTP 403. We can't simply broaden the
# trigger here either: cargo-semver-checks compiles PR code (build.rs, proc
# macros), so granting this job a write token would expose it to any code
# in the PR. And ASF infra policy independently forbids `pull_request_target`
# for any workflow that exposes GITHUB_TOKEN
# (https://infra.apache.org/github-actions-policy.html). The companion
# `workflow_run` workflow runs in the base-repo context with write access
# and never executes PR code.
name: "Detect breaking changes"
on:
pull_request:
branches:
- main
permissions:
contents: read
jobs:
check-semver:
name: Check semver
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
# `origin` may point at a fork (when a contributor runs this locally) or
# at a stale ref. Fetch the base branch from the PR's upstream repo into
# a dedicated `apache/<base>` ref so the baseline is unambiguous and the
# same ref name works locally (`git remote add apache ...`) and in CI.
- name: Fetch base branch
env:
BASE_REF: ${{ github.base_ref }}
REPO: ${{ github.repository }}
run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}"
- name: Determine changed crates
id: changed_crates
env:
BASE_REF: ${{ github.base_ref }}
run: |
PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}")
echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT"
echo "Changed crates: $PACKAGES"
# `datafusion-substrait` (and crates that depend on it via sqllogictest)
# have a build script that calls protoc, which is not preinstalled on
# ubuntu-latest runners.
- name: Install Protobuf Compiler
if: steps.changed_crates.outputs.packages != ''
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
- name: Install cargo-semver-checks
if: steps.changed_crates.outputs.packages != ''
uses: taiki-e/install-action@94cb46f8d6e437890146ffbd78a778b78e623fb2 # v2.74.0
with:
tool: cargo-semver-checks
- name: Run cargo-semver-checks
id: check_semver
if: steps.changed_crates.outputs.packages != ''
env:
BASE_REF: ${{ github.base_ref }}
PACKAGES: ${{ steps.changed_crates.outputs.packages }}
run: |
set +e
# `tee` lets cargo's output stream live into the Actions log
# while we also keep a copy for the PR comment.
# Using `apache` remote here to point to the repository the pull request is against
ci/scripts/changed_crates.sh semver-check "apache/${BASE_REF}" $PACKAGES \
2>&1 | tee /tmp/semver-output.txt
EXIT_CODE=${PIPESTATUS[0]}
# Pass the result through an output instead of failing the job:
# a detected breaking change should surface as a PR comment, not a
# red check, so PR authors aren't confused by an intentional break.
if [ "$EXIT_CODE" -eq 0 ]; then
echo "result=success" >> "$GITHUB_OUTPUT"
else
echo "result=failure" >> "$GITHUB_OUTPUT"
fi
# Stage the data the companion comment workflow needs into a single
# directory. We default the result to "success" so the comment
# workflow clears any stale comment when the check step is skipped
# (e.g. no published crates changed).
- name: Stage artifact for comment workflow
if: always()
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }}
run: |
mkdir -p semver-artifact
echo "$PR_NUMBER" > semver-artifact/pr_number
echo "$CHECK_RESULT" > semver-artifact/result
if [ -f /tmp/semver-output.txt ]; then
sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs
else
: > semver-artifact/logs
fi
- name: Upload artifact
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: semver-check-result
path: semver-artifact/
retention-days: 1