Skip to content

perf: upgrade mixtral to use expert parallelism (#783) #1036

perf: upgrade mixtral to use expert parallelism (#783)

perf: upgrade mixtral to use expert parallelism (#783) #1036

Workflow file for this run

name: Build and push OpenLLM base container
on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- '*'
paths:
- 'openllm-python/src/openllm/**'
- 'openllm-python/src/openllm_cli/**'
- 'openllm-core/src/openllm_core/**'
- 'openllm-client/src/openllm_client/**'
pull_request:
branches:
- 'main'
paths:
- 'openllm-python/src/openllm/**'
- 'openllm-python/src/openllm_cli/**'
- 'openllm-core/src/openllm_core/**'
- 'openllm-client/src/openllm_client/**'
types: [labeled, opened, synchronize, reopened]
workflow_call:
inputs:
tags:
required: true
type: string
env:
LINES: 120
COLUMNS: 120
OPENLLM_DO_NOT_TRACK: True
PYTHONUNBUFFERED: '1'
AWS_REGION: us-west-2
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
get_commit_message:
name: Get commit message
runs-on: ubuntu-latest
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository
outputs:
message: ${{ steps.commit_message.outputs.message }}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected]
# Gets the correct commit message for pull request
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Get commit message
id: commit_message
run: |
set -xe
COMMIT_MSG=$(git log --no-merges -1 --oneline)
echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT
echo github.ref ${{ github.ref }}
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
needs: get_commit_message
if: >-
contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))
env:
EC2_INSTANCE_TYPE: t3.2xlarge
EC2_AMI_ID: ami-089dafe9af191a0fd
EC2_SUBNET_ID: subnet-0ca63188fe98788c1,subnet-05997205433b249d0,subnet-07ef5d3e974275fed,subnet-0161ef0151089bb0b
EC2_SECURITY_GROUP: sg-051366641bf2b8049
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # ratchet:aws-actions/[email protected]
with:
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Start EC2 Runner
id: start-ec2-runner
uses: aarnphm/ec2-github-runner@main # ratchet:exclude
with:
mode: start
github-token: ${{ secrets.OPENLLM_PAT }}
ec2-region: ${{ env.AWS_REGION }}
ec2-image-id: ${{ env.EC2_AMI_ID }}
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
subnet-id: ${{ env.EC2_SUBNET_ID }}
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
build-and-push-image:
name: Build and push OpenLLM base image
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs: start-runner
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected]
with:
fetch-depth: 0
ref: '${{ inputs.tags }}'
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@102b1a064a9b145e56556e22b18b19c624538d94 # ratchet:rlespinasse/[email protected]
- name: Set up QEMU
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # ratchet:docker/[email protected]
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # ratchet:docker/[email protected]
with:
install: true
driver-opts: |
image=moby/buildkit:master
network=host
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@1fc5bd396d372bee37d608f955b336615edf79c8 # ratchet:sigstore/[email protected]
with:
cosign-release: 'v2.1.1'
- name: Login to GitHub Container Registry
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # ratchet:docker/[email protected]
if: github.event_name != 'pull_request'
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata tags and labels for main, release or tag
if: github.event_name != 'pull_request'
id: meta
uses: docker/metadata-action@31cebacef4805868f9ce9a0cb03ee36c32df2ac4 # ratchet:docker/[email protected]
with:
flavor: |
latest=auto
images: |
ghcr.io/bentoml/openllm
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
labels: |
maintainer=aarnphm
org.opencontainers.image.source="https://github.com/bentoml/OpenLLM"
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # ratchet:docker/[email protected]
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
with:
context: .
file: Dockerfile
push: true
platforms: 'linux/amd64'
build-args: |
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
# TODO: Once https://github.com/aws/containers-roadmap/issues/876 is supported with OCI 1.1
# then move back to saving cache within the public repo. For now we will save the cache manifest within our internal S3 buckets.
# NOTE: the region of the S3 on prod is us-east-1, where the EC2 machine is at us-west-2
cache-from: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6
# @aarnphm: max is fine here, since we didn't do any custom code yet, so it is ok to cache every layer for optimal build time
# We also ignore-error for now, just upload anything to the blob storage
cache-to: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6,mode=max,compression=zstd,ignore-error=true
- name: Sign the released image
if: ${{ github.event_name != 'pull_request' }}
env:
COSIGN_EXPERIMENTAL: 'true'
run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }}
- name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph
uses: aquasecurity/trivy-action@91713af97dc80187565512baba96e4364e983601 # ratchet:aquasecurity/trivy-action@master
if: ${{ github.event_name != 'pull_request' }}
with:
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}'
format: 'github'
output: 'dependency-results.sbom.json'
github-pat: ${{ secrets.GITHUB_TOKEN }}
scanners: 'vuln'
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@91713af97dc80187565512baba96e4364e983601 # ratchet:aquasecurity/trivy-action@master
if: ${{ github.event_name != 'pull_request' }}
with:
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}'
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL'
scanners: 'vuln'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@c0d1daa7f7e14667747d73a7dbbe8c074bc8bfe2 # ratchet:github/codeql-action/[email protected]
if: ${{ github.event_name != 'pull_request' }}
with:
sarif_file: 'trivy-results.sarif'
# TODO: Add snapshot tests here.
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- build-and-push-image
- get_commit_message
runs-on: ubuntu-latest
if: >-
(contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))) && always()
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # ratchet:aws-actions/[email protected]
with:
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Stop EC2 runner
uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.OPENLLM_PAT }}
ec2-region: ${{ env.AWS_REGION }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}