-
Notifications
You must be signed in to change notification settings - Fork 7
391 lines (340 loc) · 19.6 KB
/
build_and_test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
name: Canary image testing
on:
# Run test on each "PUSH" to release branches only.
push:
branches:
- 'main'
- 'stable/*'
# Run test on all PRs.
pull_request:
# Allows you to run this workflow manually for any internal branch from the Actions tab.
workflow_dispatch:
defaults:
run:
# reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell
shell: bash --noprofile --norc -eo pipefail -x {0}
jobs:
build-rock:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Setup LXD
uses: canonical/[email protected]
with:
channel: 5.21/edge
- name: Prepare Rock
run: ./scripts/test-helper.sh build_rock
- uses: actions/upload-artifact@v3
with:
name: rock
path: ceph.rock
flake8-lint:
runs-on: ubuntu-latest
name: Lint
steps:
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: flake8 Lint
uses: py-actions/flake8@v2
CephadmTest:
runs-on: ubuntu-latest
needs: [build-rock, flake8-lint]
strategy:
matrix:
python-version: ["3.10"]
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- name: clean unrequired files.
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Download artifact
uses: actions/download-artifact@v3
with:
name: rock
- name: set up python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: install dependencies
run: ./scripts/deploy-helper.sh install_custom_runner_dependencies
- name: Setup LXD
uses: canonical/[email protected]
with:
channel: 5.21/stable
- name: clean iptables legacy
run: |
# Sleep for some time for LXD to settle.
sleep 2m
for ipt in iptables iptables-legacy ip6tables ip6tables-legacy; do sudo $ipt --flush; sudo $ipt --flush -t nat; sudo $ipt --delete-chain; sudo $ipt --delete-chain -t nat; sudo $ipt -P FORWARD ACCEPT; sudo $ipt -P INPUT ACCEPT; sudo $ipt -P OUTPUT ACCEPT; done
sudo systemctl reload snap.lxd.daemon
sleep 5
- name: Setup LXD vm as host
run: |
lxc launch --vm ubuntu:24.04 cephadm --config limits.cpu=2 --config limits.memory=8GiB
lxc storage volume create default osd_one --type block
lxc storage volume create default osd_two --type block
lxc storage volume create default osd_three --type block
# Sleep for LXD VM to come up before attaching the disks
sleep 2m
lxc storage volume attach default osd_one cephadm
lxc storage volume attach default osd_two cephadm
lxc storage volume attach default osd_three cephadm
- name: Copy helper scripts to LXD vm
run: |
rock_file=$(ls *.rock | head -1)
lxc file push ./scripts/*.sh cephadm/root/ -r
lxc file push $rock_file cephadm/root/
- name: Prepare the docker registry
run: |
lxc exec cephadm -- sh -c "~/cephadm_helper.sh install_dependencies"
lxc exec cephadm -- sh -c "~/cephadm_helper.sh prep_registry
- name: Install cephadm with local image
run: |
lxc exec cephadm -- sh -c "~/cephadm_helper.sh deploy_cephadm"
sleep 3m
- name: Check health
run: |
# Check mon/mgr count is 1 each
lxc exec cephadm -- sh -c "~/cephadm_helper.sh poll_obj_count mon 1"
lxc exec cephadm -- sh -c "~/cephadm_helper.sh poll_obj_count mgr 1"
- name: deploy 3 OSDs
run: |
lxc exec cephadm -- sh -c "ceph orch apply osd --all-available-devices"
sleep 2m
lxc exec cephadm --sh -c "ceph -s"
- name: check health
run: lxc exec cephadm -- sh -c "~/cephadm_helper.sh poll_obj_count osd 3"
RookTest:
needs: build-rock
runs-on: ubuntu-latest
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: setup cluster resources
uses: ./.github/workflows/canary-test-config
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: validate-yaml
run: |
cd rook
./tests/scripts/github-action-helper.sh validate_yaml
- name: use local disk and create partitions for osds
run: |
cd rook
./tests/scripts/github-action-helper.sh use_local_disk
./tests/scripts/github-action-helper.sh create_partitions_for_osds
- name: Download artifact
uses: actions/download-artifact@v3
with:
name: rock
- name: Load image and load to registry
run: |
ls
rock_file=$(ls *.rock | head -1)
docker run -d -p 5000:5000 --restart=always --name registry registry:2
skopeo --insecure-policy copy oci-archive:$rock_file docker-daemon:canonical/ceph:latest
docker image ls -a
docker image tag canonical/ceph:latest localhost:5000/canonical/ceph:latest
sleep 10
docker push localhost:5000/canonical/ceph
echo $'[registries.insecure]\nregistries = ["localhost:5000"]' | sudo tee -a /etc/containers/registries.conf
- name: deploy cluster
run: ./scripts/deploy-helper.sh deploy_cluster
- name: wait for prepare pod
run: cd rook ; tests/scripts/github-action-helper.sh wait_for_prepare_pod ; sleep 100
- name: wait for ceph to be ready
run: cd rook ; tests/scripts/github-action-helper.sh wait_for_ceph_to_be_ready all 2
- name: wait for ceph mgr to be ready
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr|grep -Eosq \"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\" ; do sleep 1 && echo 'waiting for the manager IP to be available'; done"
mgr_raw=$(kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr)
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- curl --silent --show-error ${mgr_raw%%:*}:9283; do echo 'waiting for mgr prometheus exporter to be ready' && sleep 1; done"
- name: test external script create-external-cluster-resources.py
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- mkdir -p /etc/ceph/test-data
kubectl -n rook-ceph cp tests/ceph-status-out $toolbox:/etc/ceph/test-data/
kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph
kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources-tests.py $toolbox:/etc/ceph
timeout 10 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool; do echo 'waiting for script to succeed' && sleep 1; done"
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
- name: test re-running of external script should result in same output
run: |
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output1.txt
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool | tee output2.txt
if cmp output1.txt output2.txt; then
echo "files have same output"
rm output1.txt
rm output2.txt
else
echo "re-run with same flags changed the output, result in failure"
rm output1.txt
rm output2.txt
exit 1
fi
- name: dry run external script create-external-cluster-resources.py
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run
- name: test external script create-external-cluster-resources.py if users already exist with different caps
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# update client.csi-rbd-provisioner csi user caps
# print client.csi-rbd-provisioner user before update
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
kubectl -n rook-ceph exec $toolbox -- ceph auth caps client.csi-rbd-provisioner mon 'profile rbd, allow command "osd ls"' osd 'profile rbd' mgr 'allow rw'
# print client.csi-rbd-provisioner user after update
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool
# print client.csi-rbd-provisioner user after running script
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-provisioner
- name: run external script create-external-cluster-resources.py unit tests
run: |
cd rook
kubectl -n rook-ceph exec $(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[0].metadata.name}') -- python3 -m unittest /etc/ceph/create-external-cluster-resources-tests.py
- name: wait for the subvolumegroup to be created
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph fs subvolumegroup ls myfs|jq .[0].name|grep -q "group-a"; do sleep 1 && echo 'waiting for the subvolumegroup to be created'; done"
- name: test subvolumegroup validation
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# pass the correct subvolumegroup and cephfs_filesystem flag name
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group group-a --cephfs-filesystem-name myfs
# pass the subvolumegroup name which doesn't exist
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --subvolume-group false-test-subvolume-group
- name: dry run test skip monitoring endpoint
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name=replicapool --dry-run --skip-monitoring-endpoint
- name: test of rados namespace
run: |
cd rook
kubectl create -f deploy/examples/radosnamespace.yaml
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- rbd namespace ls replicapool --format=json|jq .[0].name|grep -q "namespace-a"; do sleep 1 && echo 'waiting for the rados namespace to be created'; done"
kubectl delete -f deploy/examples/radosnamespace.yaml
- name: test rados namespace validation
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# create `radosNamespace1` rados-namespace for `replicapool` rbd data-pool
kubectl -n rook-ceph exec $toolbox -- rbd namespace create replicapool/radosNamespace1
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace radosNamespace1
# test the rados namespace which not exit for replicapool(false testing)
if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rados-namespace false-test-namespace); then
echo "unexpectedly succeeded after passing the wrong rados namespace: $output"
exit 1
else
echo "script failed because wrong rados namespace was passed"
fi
- name: test external script with restricted_auth_permission flag and without having cephfs_filesystem flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true
- name: test external script with restricted_auth_permission flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --cephfs-filesystem-name myfs --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --restricted-auth-permission true
- name: test the upgrade flag
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
# update the existing non-restricted client auth with the new ones
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade
# print upgraded client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth ls
- name: test the upgrade flag for restricted auth user
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# print existing client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool
# restricted auth user need to provide --rbd-data-pool-name,
# --k8s-cluster-name and --run-as-user flag while upgrading
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --k8s-cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool
# print upgraded client auth
kubectl -n rook-ceph exec $toolbox -- ceph auth get client.csi-rbd-node-rookstorage-replicapool
- name: validate-rgw-endpoint
run: |
cd rook
rgw_endpoint=$(kubectl get service -n rook-ceph | awk '/rgw/ {print $3":80"}')
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# pass the valid rgw-endpoint of same ceph cluster
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
# pass the invalid rgw-endpoint of different ceph cluster
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint 10.108.96.128:80 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
if [ -s output.txt ]; then
echo "script run completed with stderr error after passing the wrong rgw-endpoint: $output"
rm -f output.txt
else
echo "no stderr error even when wrong endpoint was provided"
rm -f output.txt
exit 1
fi
# pass the valid rgw-endpoint of same ceph cluster with --rgw-tls-cert-path
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-tls-cert-path my-cert 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
# pass the valid rgw-endpoint of same ceph cluster with --rgw-skip-tls
timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-endpoint $rgw_endpoint --rgw-skip-tls true 2> output.txt; do sleep 1 && echo 'waiting for the rgw endpoint to be validated'; done"
tests/scripts/github-action-helper.sh check_empty_file output.txt
rm -f output.txt
- name: validate multisite
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
# create realm
kubectl -n rook-ceph exec $toolbox -- radosgw-admin realm create --rgw-realm=realm1
# pass correct realm
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm1
# pass wrong realm
if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --rgw-realm-name realm3); then
echo "script run completed with stderr error after passing the wrong realm: $output"
else
echo "script failed because wrong realm was passed"
fi
- name: test enable v2 mon port
run: |
cd rook
toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}')
kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --v2-port-enable
- name: check-ownerreferences
run: cd rook; tests/scripts/github-action-helper.sh check_ownerreferences
- name: test osd removal jobs
run: ./scripts/test-osd-removal.sh
- name: collect common logs
if: always()
uses: ./.github/workflows/collect-logs
with:
name: canary
- name: consider debugging
uses: lhotari/action-upterm@v1
if: failure()