Skip to content

Fixed Skopeo/Docker issues for Reef releases #582

Fixed Skopeo/Docker issues for Reef releases

Fixed Skopeo/Docker issues for Reef releases #582

name: Canary image testing
on:
# Run test on each "PUSH" to release branches only.
push:
branches:
- 'main'
- 'stable/*'
# Run test on all PRs.
pull_request:
# Allows you to run this workflow manually for any internal branch from the Actions tab.
workflow_dispatch:
defaults:
run:
# reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell
shell: bash --noprofile --norc -eo pipefail -x {0}
jobs:
build-rock:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Setup LXD
uses: canonical/[email protected]
with:
channel: 5.21/stable
- name: Prepare Rock
uses: canonical/craft-actions/rockcraft-pack@main
id: rockcraft
- uses: actions/upload-artifact@v3
with:
name: rock
path: ${{ steps.rockcraft.outputs.rock }}
flake8-lint:
runs-on: ubuntu-latest
name: Lint
steps:
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: flake8 Lint
uses: py-actions/flake8@v2
CephadmTest:
runs-on: ubuntu-latest
needs: [build-rock, flake8-lint]
strategy:
matrix:
python-version: ["3.10"]
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- name: clean unrequired files.
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Download artifact
uses: actions/download-artifact@v3
with:
name: rock
- name: set up python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: install dependencies
run: ./scripts/deploy-helper.sh install_custom_runner_dependencies
- name: Load image to registry
run: |
ls
rock_file=$(ls *.rock | head -1)
docker run -d -p 5000:5000 --restart=always --name registry registry:2
sleep 10
skopeo --insecure-policy copy oci-archive:$rock_file docker-daemon:canonical/ceph:latest
docker image ls -a
docker image tag canonical/ceph:latest localhost:5000/canonical/ceph:latest
sleep 10
docker push localhost:5000/canonical/ceph
echo $'[registries.insecure]\nregistries = ["localhost:5000"]' | sudo tee -a /etc/containers/registries.conf
- name: install and init lxd snap
run: |
sudo snap install lxd
sudo lxd init --auto
- name: clean iptables legacy
run: |
for ipt in iptables iptables-legacy ip6tables ip6tables-legacy; do sudo $ipt --flush; sudo $ipt --flush -t nat; sudo $ipt --delete-chain; sudo $ipt --delete-chain -t nat; sudo $ipt -P FORWARD ACCEPT; sudo $ipt -P INPUT ACCEPT; sudo $ipt -P OUTPUT ACCEPT; done
sudo systemctl reload snap.lxd.daemon
sleep 5
- name: Deploy Cephadm over LXD Container
run: |
reg_addr=$(./test/scripts/cephadm_helper.sh get_ip)
sudo python test/deploy.py --osd-num 3 --ceph-version reef image "$reg_addr:5000/canonical/ceph:latest"
RookTest:
needs: build-rock
runs-on: ubuntu-latest
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: setup cluster resources
uses: ./.github/workflows/canary-test-config
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: validate-yaml
run: |
cd rook
./tests/scripts/github-action-helper.sh validate_yaml
- name: use local disk and create partitions for osds
run: |
cd rook
./tests/scripts/github-action-helper.sh use_local_disk
./tests/scripts/github-action-helper.sh create_partitions_for_osds
- name: Download artifact
uses: actions/download-artifact@v3
with:
name: rock
- name: Load image and load to registry
run: |
ls
rock_file=$(ls *.rock | head -1)
docker run -d -p 5000:5000 --restart=always --name registry registry:2
skopeo --insecure-policy copy oci-archive:$rock_file docker-daemon:canonical/ceph:latest
docker image ls -a
docker image tag canonical/ceph:latest localhost:5000/canonical/ceph:latest
sleep 10
docker push localhost:5000/canonical/ceph
echo $'[registries.insecure]\nregistries = ["localhost:5000"]' | sudo tee -a /etc/containers/registries.conf
- name: deploy cluster
run: ./scripts/deploy-helper.sh deploy_cluster
- name: wait for prepare pod
run: cd rook ; tests/scripts/github-action-helper.sh wait_for_prepare_pod ; sleep 100
- name: wait for ceph to be ready
run: cd rook ; tests/scripts/github-action-helper.sh wait_for_ceph_to_be_ready all 2
- name: wait for ceph mgr to be ready
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr|grep -Eosq \"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\" ; do sleep 1 && echo 'waiting for the manager IP to be available'; done"
mgr_raw=$(kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr)
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- curl --silent --show-error ${mgr_raw%%:*}:9283; do echo 'waiting for mgr prometheus exporter to be ready' && sleep 1; done"
- name: test external script create-external-cluster-resources.py
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- mkdir -p /etc/ceph/test-data
kubectl -n rook-ceph cp tests/ceph-status-out $toolbox:/etc/ceph/test-data/
kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph
kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources-tests.py $toolbox:/etc/ceph
timeout 10 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool; do echo 'waiting for script to succeed' && sleep 1; done"
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
- name: test re-running of external script should result in same output
run: |
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output1.txt
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output2.txt
if cmp output1.txt output2.txt; then
echo "files have same output"
rm output1.txt
rm output2.txt
else
echo "re-run with same flags changed the output, result in failure"
rm output1.txt
rm output2.txt
exit 1
fi
- name: dry run external script create-external-cluster-resources.py
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run
- name: test external script create-external-cluster-resources.py if users already exist with different caps
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# update client.csi-rbd-provisioner csi user caps
# print client.csi-rbd-provisioner user before update
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
kubectl -n rook-ceph exec $toolbox -- ceph auth caps client.csi-rbd-provisioner mon 'profile rbd, allow command "osd ls"' osd 'profile rbd' mgr 'allow rw'
# print client.csi-rbd-provisioner user after update
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool
# print client.csi-rbd-provisioner user after running script
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
- name: run external script create-external-cluster-resources.py unit tests
run: |
cd rook
kubectl -n rook-ceph exec $(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[0].metadata.name}') -- python3 -m unittest /etc/ceph/create-external-cluster-resources-tests.py
- name: wait for the subvolumegroup to be created
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph fs subvolumegroup ls myfs|jq .[0].name|grep -q "group-a"; do sleep 1 && echo 'waiting for the subvolumegroup to be created'; done"
- name: test subvolumegroup validation
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# pass the correct subvolumegroup and cephfs_filesystem flag name
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group group-a --cephfs-filesystem-name myfs
# pass the subvolumegroup name which doesn't exist
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group false-test-subvolume-group
- name: dry run test skip monitoring endpoint
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run --skip-monitoring-endpoint
- name: test of rados namespace
run: |
cd rook
kubectl create -f deploy/examples/radosnamespace.yaml
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- rbd namespace ls replicapool --format=json|jq .[0].name|grep -q "namespace-a"; do sleep 1 && echo 'waiting for the rados namespace to be created'; done"
kubectl delete -f deploy/examples/radosnamespace.yaml
- name: test rados namespace validation
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# create `radosNamespace1` rados-namespace for `replicapool` rbd data-pool
kubectl -n rook-ceph exec $toolbox -- rbd namespace create replicapool/radosNamespace1
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace radosNamespace1
# test the rados namespace which not exit for replicapool(false testing)
if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace false-test-namespace); then
echo "unexpectedly succeeded after passing the wrong rados namespace: $output"
exit 1
else
echo "script failed because wrong rados namespace was passed"
fi
- name: test external script with restricted_auth_permission flag and without having cephfs_filesystem flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true
- name: test external script with restricted_auth_permission flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --cephfs-filesystem-name myfs --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true
- name: test the upgrade flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
# update the existing non-restricted client auth with the new ones
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade
# print upgraded client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
- name: test the upgrade flag for restricted auth user
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool
# restricted auth user need to provide --rbd-data-pool-name,
# --k8s-cluster-name and --run-as-user flag while upgrading
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool
# print upgraded client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool
- name: validate-rgw-endpoint
run: |
cd rook
rgw_endpoint=$(kubectl get service -n rook-ceph | awk '/rgw/ {print $3":80"}')
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# pass the valid rgw-endpoint of same ceph cluster
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
# pass the invalid rgw-endpoint of different ceph cluster
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint 10.108.96.128:80 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
if [ -s output.txt ]; then
echo "script run completed with stderr error after passing the wrong rgw-endpoint: $output"
rm -f output.txt
else
echo "no stderr error even when wrong endpoint was provided"
rm -f output.txt
exit 1
fi
# pass the valid rgw-endpoint of same ceph cluster with --rgw-tls-cert-path
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-tls-cert-path my-cert 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
# pass the valid rgw-endpoint of same ceph cluster with --rgw-skip-tls
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-skip-tls true 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
- name: validate multisite
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# create realm
kubectl -n rook-ceph exec $toolbox -- radosgw-admin realm create --rgw-realm=realm1
# pass correct realm
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm1
# pass wrong realm
if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm3); then
echo "script run completed with stderr error after passing the wrong realm: $output"
else
echo "script failed because wrong realm was passed"
fi
- name: test enable v2 mon port
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --v2-port-enable
- name: check-ownerreferences
run: cd rook; tests/scripts/github-action-helper.sh check_ownerreferences
- name: test osd removal jobs
run: ./scripts/test-osd-removal.sh
- name: collect common logs
if: always()
uses: ./.github/workflows/collect-logs
with:
name: canary
- name: consider debugging
uses: lhotari/action-upterm@v1
if: failure()