Skip to content

Update DataFusion to 43 (#1125) #2784

Update DataFusion to 43 (#1125)

Update DataFusion to 43 (#1125) #2784

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Rust
on:
# always trigger
push:
pull_request:
jobs:
# build the library, a compilation step used by multiple steps below
linux-build-lib:
name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
- name: Cache Cargo
uses: actions/cache@v4
with:
# these represent dependencies downloaded by cargo
# and thus do not depend on the OS, arch nor rust version.
path: /github/home/.cargo
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
# these represent compiled steps of both dependencies and arrow
# and thus are specific for a particular OS, arch and rust version.
path: /github/home/target
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Build workspace in release mode
run: |
cargo build --release
ls -l /github/home/target/release
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
- name: Save artifacts
uses: actions/upload-artifact@v4
with:
name: rust-artifacts
path: |
/github/home/target/release/ballista-scheduler
/github/home/target/release/ballista-executor
# test the crate
linux-test:
name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
apt-get -qq update && apt-get -y -qq install protobuf-compiler
protoc --version
- name: Cache Cargo
uses: actions/cache@v4
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Run tests
run: |
export PATH=$PATH:$HOME/d/protoc/bin
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cargo test --features=testcontainers
cd python
cargo test
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# run ballista tests
ballista-test:
name: Test Ballista on AMD64 Rust ${{ matrix.rust }}
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
- name: Cache Cargo
uses: actions/cache@v4
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
# Ballista is currently not part of the main workspace so requires a separate test step
- name: Run Ballista tests
run: |
export PATH=$PATH:$HOME/d/protoc/bin
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cd ballista
# Ensure also compiles in standalone mode
cargo test --no-default-features --features standalone
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
windows-and-macos:
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-latest]
rust: [stable]
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf macos compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-osx-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
echo "$HOME/d/protoc/bin" >> $GITHUB_PATH
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
if: ${{matrix.os == 'macos-latest'}}
- name: Install protobuf windows compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-win64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc.exe --version
if: ${{matrix.os == 'windows-latest'}}
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
rustup component add rustfmt
- name: Run tests
shell: bash
run: |
export PATH=$PATH:$HOME/d/protoc/bin
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
# verify that the benchmark queries return the correct results
verify-benchmark-results:
name: verify benchmark results (amd64)
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
apt-get -qq update && apt-get -y -qq install protobuf-compiler
protoc --version
- name: Cache Cargo
uses: actions/cache@v4
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Verify that benchmark queries return expected results
run: |
cargo test --package ballista-benchmarks --profile release-nonlto --features=ci -- --test-threads=1
lint:
name: Lint
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup toolchain
run: |
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
- name: Run
run: ci/scripts/rust_fmt.sh
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
docker:
name: Docker
needs: [linux-build-lib]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4
- name: Restore rust artifacts
uses: actions/download-artifact@v4
with:
name: rust-artifacts
path: target/release
- name: Build and push Docker image
run: |
echo "github user is $DOCKER_USER"
docker build -t arrow-ballista-standalone:latest -f dev/docker/ballista-standalone.Dockerfile .
export DOCKER_TAG="$(git describe --exact-match --tags $(git log -n1 --pretty='%h') || echo '')"
if [[ $DOCKER_TAG =~ ^[0-9\.]+-rc[0-9]+$ ]]
then
echo "publishing docker tag $DOCKER_TAG"
docker tag arrow-ballista-standalone:latest ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG
docker login ghcr.io -u $DOCKER_USER -p "$DOCKER_PASS"
docker push ghcr.io/apache/arrow-ballista-standalone:$DOCKER_TAG
fi
env:
DOCKER_USER: ${{ github.actor }}
DOCKER_PASS: ${{ secrets.GITHUB_TOKEN }}
clippy:
name: Clippy
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v4
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Install Clippy
run: |
rustup component add clippy
- name: Run clippy
run: ci/scripts/rust_clippy.sh
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
cargo-toml-formatting-checks:
name: Check Cargo.toml formatting
needs: [linux-build-lib]
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [stable]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v4
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ matrix.rust }}
- name: Install cargo-tomlfmt
run: |
which cargo-tomlfmt || cargo install cargo-tomlfmt
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
- name: Check Cargo.toml formatting
run: |
# if you encounter error, try rerun the command below, finally run 'git diff' to
# check which Cargo.toml introduces formatting violation
#
# ignore ./Cargo.toml because putting workspaces in multi-line lists make it easy to read
ci/scripts/rust_toml_fmt.sh
if test -f "./Cargo.toml.bak"; then
echo "cargo tomlfmt found format violations"
exit 1
fi
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# Coverage job was failing. https://github.com/apache/arrow-datafusion/issues/590 tracks re-instating it
# coverage:
# name: Coverage
# runs-on: ubuntu-latest
# strategy:
# matrix:
# arch: [amd64]
# rust: [stable]
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: true
# - name: Cache Cargo
# uses: actions/cache@v4
# with:
# path: /home/runner/.cargo
# # this key is not equal because the user is different than on a container (runner vs github)
# key: cargo-coverage-cache-
# - name: Cache Rust dependencies
# uses: actions/cache@v4
# with:
# path: /home/runner/target
# # this key is not equal because coverage uses different compilation flags.
# key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
# - name: Run coverage
# run: |
# export ARROW_TEST_DATA=$(pwd)/testing/data
# export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
# # see https://github.com/xd009642/tarpaulin/issues/618
# cargo install --version 0.16.0 cargo-tarpaulin
# cargo tarpaulin --out Xml
# env:
# CARGO_HOME: "/home/runner/.cargo"
# CARGO_TARGET_DIR: "/home/runner/target"
# - name: Report coverage
# continue-on-error: true
# run: bash <(curl -s https://codecov.io/bash)