Skip to content

perf: upgrade mixtral to use expert parallelism #2447

perf: upgrade mixtral to use expert parallelism

perf: upgrade mixtral to use expert parallelism #2447

Workflow file for this run

name: Continuous Integration
on:
workflow_call:
push:
branches: [main]
paths-ignore:
- 'docs/**'
- 'bazel/**'
- 'typings/**'
- '*.md'
- 'changelog.d/**'
- 'assets/**'
pull_request:
branches: [main]
paths-ignore:
- 'docs/**'
- 'bazel/**'
- 'typings/**'
- '*.md'
- 'changelog.d/**'
- 'assets/**'
env:
LINES: 120
COLUMNS: 120
OPENLLM_DO_NOT_TRACK: True
PYTHONUNBUFFERED: '1'
HATCH_VERBOSE: 2
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
defaults:
run:
shell: bash --noprofile --norc -exo pipefail {0}
jobs:
tests:
runs-on: ${{ matrix.os }}
if: ${{ github.event_name == 'pull_request' || github.event_name == 'push'|| github.event_name == 'workflow_call' }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.8', '3.11']
exclude:
- os: 'windows-latest'
name: tests (${{ matrix.python-version }}.${{ matrix.os }})
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected]
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
with:
bentoml-version: 'main'
python-version: ${{ matrix.python-version }}
- name: Run tests
run: hatch run tests:python
- name: Disambiguate coverage filename
run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}"
- name: Upload coverage data
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
with:
name: coverage-data
path: .coverage.*
coverage:
name: report-coverage
runs-on: ubuntu-latest
if: false
needs: tests
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected]
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
with:
bentoml-version: 'main'
python-version-file: .python-version-default
- name: Download coverage data
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
with:
name: coverage-data
- name: Combine coverage data
run: hatch run coverage:combine
- name: Export coverage reports
run: |
hatch run coverage:report-xml openllm-python
hatch run coverage:report-uncovered-html openllm-python
- name: Upload uncovered HTML report
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
with:
name: uncovered-html-report
path: htmlcov
- name: Generate coverage summary
run: hatch run coverage:generate-summary
- name: Write coverage summary report
if: github.event_name == 'pull_request'
run: hatch run coverage:write-summary-report
- name: Update coverage pull request comment
if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork
uses: marocchino/sticky-pull-request-comment@efaaab3fd41a9c3de579aba759d2552635e590fd # ratchet:marocchino/sticky-pull-request-comment@v2
with:
path: coverage-report.md
cli-benchmark:
name: Check for CLI responsiveness
runs-on: ubuntu-latest
env:
HYPERFINE_VERSION: '1.12.0'
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected]
with:
fetch-depth: 0
- name: Install hyperfine
run: |
wget https://github.com/sharkdp/hyperfine/releases/download/v${HYPERFINE_VERSION}/hyperfine_${HYPERFINE_VERSION}_amd64.deb
sudo dpkg -i hyperfine_${HYPERFINE_VERSION}_amd64.deb
- uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
with:
bentoml-version: 'main'
python-version-file: .python-version-default
- name: Install self
run: bash local.sh
- name: Speed
run: hyperfine -m 100 --warmup 10 openllm
brew-dry-run:
name: Running dry-run tests for brew
runs-on: macos-latest
steps:
- name: Install tap and dry-run
run: |
brew tap bentoml/openllm https://github.com/bentoml/openllm
brew install openllm
openllm --help
openllm models --show-available
evergreen: # https://github.com/marketplace/actions/alls-green#why
if: always()
needs:
- tests
- cli-benchmark
- brew-dry-run
runs-on: ubuntu-latest
steps:
- name: Decide whether the needed jobs succeeded or failed
uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # ratchet:re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true