diff --git a/.github/workflows/amd_perf_kernel_benchmark.yml b/.github/workflows/amd_perf_kernel_benchmark.yml
new file mode 100644
index 000000000000..43be8e421a10
--- /dev/null
+++ b/.github/workflows/amd_perf_kernel_benchmark.yml
@@ -0,0 +1,160 @@
+name: AMD Perf Kernel Benchmark
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [main_perf]
+  pull_request:
+    branches: [main_perf]
+    merge_group:
+      branches: [main_perf]
+      types: [checks_requested]
+
+concurrency:
+  group: ${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main_perf' }}
+
+permissions: read-all
+
+env:
+  TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
+  TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
+  TRITON_DISABLE_LINE_INFO: 1
+
+jobs:
+  Check-File-Changes:
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Check file changes
+        run: |
+          git fetch origin ${{ github.base_ref }}
+          changed_files=$(git diff --name-only origin/${{ github.base_ref }} ${{ github.sha }})
+          echo "Changed files:"
+          echo "$changed_files"
+          if echo "$changed_files" | grep -vE "^python/perf-kernels/|^\.github/workflows/amd_"; then
+            echo "Changes detected outside of the python/perf-kernels directory or .github/workflows/amd_ files. Failing the workflow."
+            exit 1
+          fi
+
+  Runner-Preparation-AMD:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    outputs:
+      matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
+    steps:
+      - name: Prepare runner matrix
+        id: set-matrix
+        run: |
+          if [ x"${{ github.repository }}" == x"ROCm/triton" ]; then
+            echo '::set-output name=matrix-HIP::[["self-hosted", "rocm.gfx90a"]]'
+          else
+            echo '::set-output name=matrix-HIP::[["ubuntu-latest"]]'
+          fi
+
+  Benchmark-AMD:
+    needs: Runner-Preparation-AMD
+    if: needs.Runner-Preparation-AMD.outputs.matrix-HIP != ''
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        runner: ${{fromJson(needs.Runner-Preparation-AMD.outputs.matrix-HIP)}}
+    container:
+      image: rocm/pytorch:rocm6.1_ubuntu22.04_py3.10_pytorch_2.4
+      options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Clear cache
+        run: |
+          rm -rf ~/.triton
+          mkdir -p ~/.triton
+          ls -alh ~/.triton
+      - name: Update PATH
+        run: |
+          echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
+      - name: Install pip dependencies
+        run: |
+          python3 -m pip install --upgrade pip
+          python3 -m pip install lit matplotlib pandas
+      - name: Install Triton
+        run: |
+          echo "PATH is '$PATH'"
+          pip uninstall -y triton
+          cd python
+          pip install -v -e .
+      - name: Run Perf Kernels Benchmark
+        run: |
+          python ./python/perf-kernels/flash-attention.py
+          python ./python/perf-kernels/softmax.py
+          python ./python/perf-kernels/rmsnorm.py
+          python ./python/perf-kernels/layernorm.py
+        # python ./python/perf-kernels/multreduce_matmul_kernel.py bench
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-csv-results
+          path: python/perf-kernels/*.csv
+          if-no-files-found: error
+
+  Analyze-Results:
+    needs: Benchmark-AMD
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout reporting system
+        uses: actions/checkout@v4
+        with:
+          repository: 'ROCm/triton-reporting-system'
+      - name: Download CSV results
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-csv-results
+          path: $GITHUB_WORKSPACE/csvs
+      - name: Find previous successful run
+        id: find_run
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const runs = await github.rest.actions.listWorkflowRuns({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              workflow_id: context.workflow,
+              branch: 'main_perf',
+              status: 'success'
+            });
+
+            if (runs.data.workflow_runs.length > 1) {
+              core.setOutput('run_id', runs.data.workflow_runs[1].id);
+            } else {
+              console.log("No previous successful run found");
+            }
+      - name: Download previous data
+        uses: actions/download-artifact@v4
+        if: steps.find_run.outputs.run_id != null
+        with:
+          name: data.json
+          github-token: ${{ github.token }}
+          run-id: ${{ steps.find_run.outputs.run_id }}
+      - name: Compare results
+        run: |
+          if [[ -e data.json ]]; then
+            master_data_args="--master-history-file data.json"
+          else
+            master_data_args=""
+
+          echo "[]" > current_data.json
+          python3 report_builder.py --report-config TRITON_KERNELS \
+            --output-dir ./output --history-file current_data.json \
+            --root-dir csvs ${master_data_args} --skip-html-report
+      - name: Upload results
+        uses: actions/upload-artifact@v4
+        with:
+          name: comparison-results.txt
+          path: output/results.txt
+