diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml new file mode 100644 index 0000000000000..03a32be519a08 --- /dev/null +++ b/.github/workflows/breaking_changes_detector.yml @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Detect semver-incompatible (breaking) API changes in crates modified by a PR. +# +# Only public workspace crates that have file changes are checked. +# Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded. +# +# If breaking changes are found, a sticky comment is posted on the PR. +# The comment is removed automatically once the issues are resolved. + +name: "Detect breaking changes" + +on: + pull_request: + branches: + - main + +permissions: + contents: read + +jobs: + check-semver: + name: Check semver + runs-on: ubuntu-latest + outputs: + logs: ${{ steps.check_semver.outputs.logs }} + # Default to "success" so the comment job clears any stale comment + # when the check step is skipped (e.g. no published crates changed). + result: ${{ steps.check_semver.outputs.result || 'success' }} + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + # For fork PRs, `origin` points to the fork, not the upstream repo. + # Explicitly fetch the base branch from the upstream repo so we have + # a valid baseline ref for both diff and semver-checks. + - name: Fetch base branch + env: + BASE_REF: ${{ github.base_ref }} + REPO: ${{ github.repository }} + run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/origin/${BASE_REF}" + + - name: Determine changed crates + id: changed_crates + env: + BASE_REF: ${{ github.base_ref }} + run: | + PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "origin/${BASE_REF}") + echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" + echo "Changed crates: $PACKAGES" + + - name: Install cargo-semver-checks + if: steps.changed_crates.outputs.packages != '' + uses: taiki-e/install-action@94cb46f8d6e437890146ffbd78a778b78e623fb2 # v2.74.0 + with: + tool: cargo-semver-checks + + - name: Run cargo-semver-checks + id: check_semver + if: steps.changed_crates.outputs.packages != '' + env: + BASE_REF: ${{ github.base_ref }} + PACKAGES: ${{ steps.changed_crates.outputs.packages }} + run: | + set +e + # `tee` lets cargo's output stream live into the Actions log + # while we also keep a copy for the PR comment. + ci/scripts/changed_crates.sh semver-check "origin/${BASE_REF}" $PACKAGES \ + 2>&1 | tee /tmp/semver-output.txt + EXIT_CODE=${PIPESTATUS[0]} + { + echo "logs<> "$GITHUB_OUTPUT" + # Pass the result through an output instead of failing the job: + # a detected breaking change should surface as a PR comment, not a + # red check, so PR authors aren't confused by an intentional break. + if [ "$EXIT_CODE" -eq 0 ]; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi + + # Post or remove a sticky comment on the PR based on the semver check result. + comment-on-pr: + name: Comment on pull request + runs-on: ubuntu-latest + needs: check-semver + if: always() + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: ci/scripts + + - name: Update PR comment + env: + GH_TOKEN: ${{ github.token }} + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + CHECK_RESULT: ${{ needs.check-semver.outputs.result }} + SEMVER_LOGS: ${{ needs.check-semver.outputs.logs }} + run: | + ci/scripts/changed_crates.sh comment \ + "$REPO" "$PR_NUMBER" "$CHECK_RESULT" "$SEMVER_LOGS" diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh new file mode 100755 index 0000000000000..2ee76ad010e97 --- /dev/null +++ b/ci/scripts/changed_crates.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Helper script for the breaking-changes-detector workflow. +# +# Subcommands: +# changed-crates +# Print space-separated list of crate names whose files changed vs base_ref. +# Only published workspace members (those without `publish = false`) are +# considered. +# +# semver-check +# Run cargo-semver-checks for the given packages against base_ref. +# Output and exit code are passed through unchanged; the caller is +# responsible for capturing/formatting them. +# +# comment [logs] +# Upsert or delete a sticky PR comment based on check_result. +# check_result: "success" deletes any existing comment, +# anything else upserts the comment with the provided logs. +# Requires GH_TOKEN to be set. + +set -euo pipefail + +MARKER="" + +# ── changed-crates ────────────────────────────────────────────────── +cmd_changed_crates() { + local base_ref="${1:?Usage: changed_crates.sh changed-crates }" + + # 1. Files changed between the PR and the base branch. + local changed_files + changed_files=$(git diff --name-only "${base_ref}...HEAD") + + # 2. Every publishable workspace member, one per line as + # " ". `publish = false` in Cargo.toml shows + # up as `"publish": []` in cargo metadata, so filtering on that + # excludes internal crates without a manual exclusion list. + local crates + crates=$(cargo metadata --no-deps --format-version 1 | jq -r ' + (.workspace_root + "/") as $root + | .packages[] + | select(.publish != []) + | "\(.name) \(.manifest_path | ltrimstr($root) | rtrimstr("/Cargo.toml"))" + ') + + # 3. Keep crates whose directory contains a changed file. + while read -r name dir; do + if grep -q "^${dir}/" <<<"$changed_files"; then + echo "$name" + fi + done <<<"$crates" | xargs +} + +# ── semver-check ──────────────────────────────────────────────────── +cmd_semver_check() { + local base_ref="${1:?Usage: changed_crates.sh semver-check }" + shift + + local args=() + for pkg in "$@"; do + args+=(--package "$pkg") + done + + cargo semver-checks --baseline-rev "$base_ref" "${args[@]}" +} + +# ── comment ───────────────────────────────────────────────────────── +cmd_comment() { + local repo="${1:?Usage: changed_crates.sh comment [logs]}" + local pr_number="${2:?}" + local check_result="${3:?}" + local logs="${4:-}" + + # Find existing comment with our marker + local comment_id + comment_id=$(gh api "repos/${repo}/issues/${pr_number}/comments" \ + --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" | head -1) + + echo "existing breaking change comment id $comment_id" + + if [ "$check_result" = "success" ]; then + # Delete the comment if one exists + if [ -n "$comment_id" ]; then + echo "result is success, so deleting breaking change comment" + gh api "repos/${repo}/issues/comments/${comment_id}" --method DELETE + else + echo "result is success and no previous comment to delete" + fi + else + local body="${MARKER} +Thank you for opening this pull request! + +Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). + +
+Details + +\`\`\` +${logs} +\`\`\` + +
" + + if [ -n "$comment_id" ]; then + echo "comment already exists, updating content" + gh api "repos/${repo}/issues/comments/${comment_id}" \ + --method PATCH --field body="$body" + else + echo "no comment with breaking changes, creating a new one" + gh api "repos/${repo}/issues/${pr_number}/comments" \ + --method POST --field body="$body" + fi + fi +} + +# ── main ──────────────────────────────────────────────────────────── +cmd="${1:?Usage: changed_crates.sh [args...]}" +shift + +case "$cmd" in + changed-crates) cmd_changed_crates "$@" ;; + semver-check) cmd_semver_check "$@" ;; + comment) cmd_comment "$@" ;; + *) echo "Unknown command: $cmd" >&2; exit 1 ;; +esac