.github/workflows/build_and_test.yaml

name: Canary image testing

on:
  # Run test on each "PUSH" to release branches only.
  push:
    branches:
      - 'main'
      - 'stable/*'
  # Run test on all PRs.
  pull_request:
  # Allows you to run this workflow manually for any internal branch from the Actions tab.
  workflow_dispatch:

defaults:
  run:
    # reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell
    shell: bash --noprofile --norc -eo pipefail -x {0}

jobs:
  build-rock:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Setup LXD
        uses: canonical/setup-lxd@v0.1.1
        with:
          channel: 5.21/stable
      - name: Prepare Rock
        uses: canonical/craft-actions/rockcraft-pack@main
        id: rockcraft
      - uses: actions/upload-artifact@v3
        with:
          name: rock
          path: ${{ steps.rockcraft.outputs.rock }} 

  flake8-lint:
    runs-on: ubuntu-latest
    name: Lint
    steps:
      - name: Check out source repository
        uses: actions/checkout@v3
      - name: Set up Python environment
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: flake8 Lint
        uses: py-actions/flake8@v2


  CephadmTest:
    runs-on: ubuntu-latest
    needs: [build-rock, flake8-lint]
    strategy:
      matrix:
        python-version: ["3.10"]

    steps:
      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
      - uses: actions/checkout@v3
      - name: clean unrequired files.
        run: |
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /opt/ghc
          sudo rm -rf "/usr/local/share/boost"
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"

      - name: Download artifact
        uses: actions/download-artifact@v3
        with:
          name: rock

      - name: set up python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: install dependencies
        run: ./scripts/deploy-helper.sh install_custom_runner_dependencies

      - name: Load image to registry
        run: |
          ls
          rock_file=$(ls *.rock | head -1)
          docker run -d -p 5000:5000 --restart=always --name registry registry:2
          sleep 10
          skopeo --insecure-policy copy oci-archive:$rock_file docker-daemon:canonical/ceph:latest
          docker image ls -a
          docker image tag canonical/ceph:latest localhost:5000/canonical/ceph:latest
          sleep 10
          docker push localhost:5000/canonical/ceph
          echo $'[registries.insecure]\nregistries = ["localhost:5000"]' | sudo tee -a /etc/containers/registries.conf

      - name: install and init lxd snap
        run: |
          sudo snap install lxd
          sudo lxd init --auto

      - name: clean iptables legacy
        run: |
          for ipt in iptables iptables-legacy ip6tables ip6tables-legacy; do sudo $ipt --flush; sudo $ipt --flush -t nat; sudo $ipt --delete-chain; sudo $ipt --delete-chain -t nat; sudo $ipt -P FORWARD ACCEPT; sudo $ipt -P INPUT ACCEPT; sudo $ipt -P OUTPUT ACCEPT; done
          sudo systemctl reload snap.lxd.daemon
          sleep 5

      - name: Deploy Cephadm over LXD Container
        run: |
          reg_addr=$(./test/scripts/cephadm_helper.sh get_ip)
          sudo python test/deploy.py --osd-num 3 --ceph-version reef image "$reg_addr:5000/canonical/ceph:latest"

  RookTest:
    needs: build-rock
    runs-on: ubuntu-latest

    steps:
      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
      - uses: actions/checkout@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2

      - name: setup cluster resources
        uses: ./.github/workflows/canary-test-config
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: validate-yaml
        run: |
          cd rook
          ./tests/scripts/github-action-helper.sh validate_yaml

      - name: use local disk and create partitions for osds
        run: |
          cd rook
          ./tests/scripts/github-action-helper.sh use_local_disk
          ./tests/scripts/github-action-helper.sh create_partitions_for_osds

      - name: Download artifact
        uses: actions/download-artifact@v3
        with:
          name: rock

      - name: Load image and load to registry
        run: |
          ls
          rock_file=$(ls *.rock | head -1)
          docker run -d -p 5000:5000 --restart=always --name registry registry:2
          skopeo --insecure-policy copy oci-archive:$rock_file docker-daemon:canonical/ceph:latest
          docker image ls -a
          docker image tag canonical/ceph:latest localhost:5000/canonical/ceph:latest
          sleep 10
          docker push localhost:5000/canonical/ceph
          echo $'[registries.insecure]\nregistries = ["localhost:5000"]' | sudo tee -a /etc/containers/registries.conf

      - name: deploy cluster
        run: ./scripts/deploy-helper.sh deploy_cluster

      - name: wait for prepare pod
        run: cd rook ; tests/scripts/github-action-helper.sh wait_for_prepare_pod ; sleep 100

      - name: wait for ceph to be ready
        run: cd rook ; tests/scripts/github-action-helper.sh wait_for_ceph_to_be_ready all 2

      - name: wait for ceph mgr to be ready
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr|grep -Eosq \"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\" ; do sleep 1 && echo 'waiting for the manager IP to be available'; done"
          mgr_raw=$(kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr)
          timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- curl --silent --show-error ${mgr_raw%%:*}:9283; do echo 'waiting for mgr prometheus exporter to be ready' && sleep 1; done"

      - name: test external script create-external-cluster-resources.py
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- mkdir -p /etc/ceph/test-data
          kubectl -n rook-ceph cp tests/ceph-status-out $toolbox:/etc/ceph/test-data/
          kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph
          kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources-tests.py $toolbox:/etc/ceph
          timeout 10 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool; do echo 'waiting for script to succeed' && sleep 1; done"
          # print existing client auth
          kubectl -n rook-ceph exec $toolbox -- ceph auth ls

      - name: test re-running of external script should result in same output
        run: |
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output1.txt
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output2.txt
          if cmp output1.txt output2.txt; then
            echo "files have same output"
            rm output1.txt
            rm output2.txt
          else
            echo "re-run with same flags changed the output, result in failure"
            rm output1.txt
            rm output2.txt
            exit 1
          fi

      - name: dry run external script create-external-cluster-resources.py
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run

      - name: test external script create-external-cluster-resources.py if users already exist with different caps
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # update client.csi-rbd-provisioner csi user caps
          # print client.csi-rbd-provisioner user before update
          kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
          kubectl -n rook-ceph exec $toolbox -- ceph auth caps client.csi-rbd-provisioner mon 'profile rbd, allow command "osd ls"' osd 'profile rbd' mgr 'allow rw'
          # print client.csi-rbd-provisioner user after update
          kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool
          # print client.csi-rbd-provisioner user after running script
          kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner

      - name: run external script create-external-cluster-resources.py unit tests
        run: |
          cd rook
          kubectl -n rook-ceph exec $(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[0].metadata.name}') -- python3 -m unittest /etc/ceph/create-external-cluster-resources-tests.py

      - name: wait for the subvolumegroup to be created
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph fs subvolumegroup ls myfs|jq .[0].name|grep -q "group-a"; do sleep 1 && echo 'waiting for the subvolumegroup to be created'; done"

      - name: test subvolumegroup validation
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # pass the correct subvolumegroup and cephfs_filesystem flag name
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group group-a --cephfs-filesystem-name myfs
          # pass the subvolumegroup name which doesn't exist
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group false-test-subvolume-group

      - name: dry run test skip monitoring endpoint
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run --skip-monitoring-endpoint

      - name: test of rados namespace
        run: |
          cd rook
          kubectl create -f deploy/examples/radosnamespace.yaml
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- rbd namespace ls replicapool --format=json|jq .[0].name|grep -q "namespace-a"; do sleep 1 && echo 'waiting for the rados namespace to be created'; done"
          kubectl delete -f deploy/examples/radosnamespace.yaml

      - name: test rados namespace validation
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # create `radosNamespace1` rados-namespace for `replicapool` rbd data-pool
          kubectl -n rook-ceph exec $toolbox -- rbd namespace create replicapool/radosNamespace1
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace radosNamespace1
           # test the rados namespace which not exit for replicapool(false testing)
          if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace false-test-namespace); then
            echo "unexpectedly succeeded after passing the wrong rados namespace: $output"
            exit 1
          else
            echo "script failed because wrong rados namespace was passed"
          fi

      - name: test external script with restricted_auth_permission flag and without having cephfs_filesystem flag
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true

      - name: test external script with restricted_auth_permission flag
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --cephfs-filesystem-name myfs --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true

      - name: test the upgrade flag
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # print existing client auth
          kubectl -n rook-ceph exec $toolbox -- ceph auth ls
          # update the existing non-restricted client auth with the new ones
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade
          # print upgraded client auth
          kubectl -n rook-ceph exec $toolbox -- ceph auth ls

      - name: test the upgrade flag for restricted auth user
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # print existing client auth
          kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool
          # restricted auth user need to provide --rbd-data-pool-name,
          #  --k8s-cluster-name and --run-as-user flag while upgrading
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage  --run-as-user client.csi-rbd-node-rookstorage-replicapool
          # print upgraded client auth
          kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool

      - name: validate-rgw-endpoint
        run: |
          cd rook
          rgw_endpoint=$(kubectl get service -n rook-ceph |  awk '/rgw/ {print $3":80"}')
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # pass the valid rgw-endpoint of same ceph cluster
          timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
          tests/scripts/github-action-helper.sh check_empty_file output.txt
          rm -f output.txt
          # pass the invalid rgw-endpoint of different ceph cluster
          timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint 10.108.96.128:80  2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
          if [ -s output.txt ]; then
            echo "script run completed with stderr error after passing the wrong rgw-endpoint: $output"
            rm -f output.txt
          else
            echo "no stderr error even when wrong endpoint was provided"
            rm -f output.txt
            exit 1
          fi
          # pass the valid rgw-endpoint of same ceph cluster with --rgw-tls-cert-path
          timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-tls-cert-path my-cert 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
          tests/scripts/github-action-helper.sh check_empty_file output.txt
          rm -f output.txt
          # pass the valid rgw-endpoint of same ceph cluster with --rgw-skip-tls
          timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-skip-tls true 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
          tests/scripts/github-action-helper.sh check_empty_file output.txt
          rm -f output.txt

      - name: validate multisite
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          # create realm
          kubectl -n rook-ceph exec $toolbox -- radosgw-admin realm create --rgw-realm=realm1
          # pass correct realm
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm1
          # pass wrong realm
          if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm3); then
            echo "script run completed with stderr error after passing the wrong realm: $output"
          else
            echo "script failed because wrong realm was passed"
          fi

      - name: test enable v2 mon port
        run: |
          cd rook
          toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
          kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --v2-port-enable

      - name: check-ownerreferences
        run: cd rook; tests/scripts/github-action-helper.sh check_ownerreferences

      - name: test osd removal jobs
        run: ./scripts/test-osd-removal.sh

      - name: collect common logs
        if: always()
        uses: ./.github/workflows/collect-logs
        with:
          name: canary
        
      - name: consider debugging
        uses: lhotari/action-upterm@v1
        if: failure()