tpcds

tpcds #164

Summary
Jobs
- tpcds
Run details
- Usage
- Workflow file

Workflow file for this run

	name: tpcds

	on:
	push:
	branches:
	- 'main'
	paths:
	- '**/tpcds.yml'
	pull_request:
	branches:
	- 'main'
	paths:
	- '**/tpcds.yml'
	schedule:
	- cron: '30 20 * * *'
	workflow_dispatch:
	inputs:
	debug:
	type: boolean
	description: "Run the build with tmate debugging enabled"
	required: false
	default: false

	jobs:
	tpcds:
	runs-on: ubuntu-20.04

	services:
	redis:
	image: redis
	options: >-
	--health-cmd "redis-cli ping"
	--health-interval 10s
	--health-timeout 5s
	--health-retries 5
	ports:
	- 6379:6379

	steps:
	- run: sudo swapoff -a

	- name: Set up Java
	uses: actions/setup-java@v3
	with:
	distribution: 'temurin'
	java-version: '8'

	- name: Checkout
	uses: actions/checkout@v3
	with:
	fetch-depth: 1

	- name: Build
	uses: ./.github/actions/build

	- name: Cache local Maven repository
	uses: actions/cache@v3
	with:
	path: ~/.m2/repository
	key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
	restore-keys: \|
	${{ runner.os }}-maven-

	- name: Build Hadoop SDK
	run: make -C sdk/java

	- name: Juicefs Format
	run: \|
	sudo ./juicefs format redis://127.0.0.1:6379/1 dev --trash-days 0 --bucket /tmp/jfs
	sudo chmod -R 777 /tmp/jfs/dev/

	- name: Set up Spark
	working-directory: /tmp
	run: \|
	spark_version=3.3.3
	wget -q https://dlcdn.apache.org/spark/spark-$spark_version/spark-$spark_version-bin-hadoop3.tgz
	tar -zxf spark-$spark_version-bin-hadoop3.tgz
	ln -s spark-$spark_version-bin-hadoop3 spark
	cp ~/work/juicefs/juicefs/sdk/java/target/juicefs-hadoop*jar /tmp/spark/jars
	cp ~/work/juicefs/juicefs/.github/workflows/resources/core-site.xml /tmp/spark/conf
	export PATH=$PATH:/tmp/spark/bin:/tmp/spark/sbin
	echo /tmp/spark/bin >> $GITHUB_PATH
	echo /tmp/spark/sbin >> $GITHUB_PATH
	start-master.sh -i localhost
	start-slave.sh spark://localhost:7077

	- name: Set up tpcds-kit
	working-directory: /tmp
	run: \|
	echo workspace is ${{ github.workspace }}
	sudo ${{ github.workspace }}/.github/scripts/apt_install.sh gcc make flex bison byacc git
	git clone --depth 1 https://github.com/databricks/tpcds-kit.git
	cd tpcds-kit/tools
	make OS=LINUX

	- name: Set up spark-sql-perf
	working-directory: /tmp
	run: \|
	sudo ${{ github.workspace }}/.github/scripts/apt_install.sh apt-transport-https curl gnupg sbt
	echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" \| sudo tee /etc/apt/sources.list.d/sbt.list
	echo "deb https://repo.scala-sbt.org/scalasbt/debian /" \| sudo tee /etc/apt/sources.list.d/sbt_old.list
	curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" \| sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import
	sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg
	mkdir ~/.sbt
	cat > ~/.sbt/repositories <<EOF
	[repositories]
	local
	maven-central: https://repo1.maven.org/maven2/
	typesafe-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
	EOF
	git clone --depth 1 https://github.com/databricks/spark-sql-perf.git
	cd spark-sql-perf
	sbt package

	- name: Gen data
	timeout-minutes: 35
	working-directory: /tmp
	run: \|
	spark-shell \
	--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
	--master spark://localhost:7077 \
	--deploy-mode client \
	--executor-memory 2G \
	--driver-memory 2G \
	--executor-cores 1 \
	--conf spark.sql.shuffle.partitions=10 \
	-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala \
	\|\| spark-shell \
	--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
	--master spark://localhost:7077 \
	--deploy-mode client \
	--executor-memory 2G \
	--driver-memory 2G \
	--executor-cores 1 \
	--conf spark.sql.shuffle.partitions=10 \
	-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_datagen.scala

	- name: Run tpcds
	timeout-minutes: 30
	working-directory: /tmp
	run: \|
	spark-shell \
	--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
	--master spark://localhost:7077 \
	--deploy-mode client \
	--executor-memory 2G \
	--driver-memory 2G \
	--executor-cores 1 \
	--conf spark.sql.shuffle.partitions=10 \
	-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala \
	\|\| spark-shell \
	--jars /tmp/spark-sql-perf/target/scala-2.12/spark-sql-perf*.jar \
	--master spark://localhost:7077 \
	--deploy-mode client \
	--executor-memory 2G \
	--driver-memory 2G \
	--executor-cores 1 \
	--conf spark.sql.shuffle.partitions=10 \
	-i ~/work/juicefs/juicefs/.github/workflows/resources/tpcds_run.scala

	- name: Log
	if: ${{ always() }}
	run: \|
	if [ -f /var/log/juicefs.log ]; then
	echo "juicefs log"
	sudo tail -n 1000 /var/log/juicefs.log
	grep "<FATAL>:" /var/log/juicefs.log && exit 1 \|\| true
	fi

	- name: Send Slack Notification
	if: ${{ failure() }}
	uses: juicedata/slack-notify-action@main
	with:
	channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
	slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

	- name: Setup upterm session
	if: ${{ failure() && github.event_name == 'workflow_dispatch' && github.event.inputs.debug }}
	timeout-minutes: 60
	uses: lhotari/action-upterm@v1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

tpcds #164

Workflow file

tpcds #164

Jobs

Run details

Workflow file for this run