From 07b4c951836368d0c78d8345b8b275d09976153c Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Wed, 20 Sep 2023 18:48:05 -0400 Subject: [PATCH 1/6] initial pass at gcp ci Includes the GCP module and the first pass at testing around it -- still need to actually complete the buildkite bit and make sure that the GCP_CREDS passthru is legit in buildkite. Currently the standard tiered storage testing works with ubuntu, haven't gotten a clean run with the private proxy in this format -- got an error about focal that is probably related to the ancient image we're using by default in the GCP module since I just copied that bit over from the OG stuff. --- .buildkite/pipeline.yml | 16 ++ .buildkite/scripts/standup-gcp-cluster.sh | 63 +++++++ .buildkite/scripts/test-proxy-cluster.sh | 7 + .../scripts/test-tiered-storage-cluster.sh | 10 +- Taskfile.yaml | 8 +- gcp/{readme.md => README.md} | 0 gcp/cluster.tf | 157 ------------------ gcp/main.tf | 157 ++++++++++++++++++ gcp/outputs.tf | 15 -- gcp/private-test/main.tf | 156 +++++++++++++++++ gcp/provider.tf | 4 - gcp/vars.tf | 86 ---------- 12 files changed, 412 insertions(+), 267 deletions(-) create mode 100755 .buildkite/scripts/standup-gcp-cluster.sh rename gcp/{readme.md => README.md} (100%) delete mode 100644 gcp/cluster.tf create mode 100644 gcp/main.tf delete mode 100644 gcp/outputs.tf create mode 100644 gcp/private-test/main.tf delete mode 100644 gcp/provider.tf delete mode 100644 gcp/vars.tf diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index c810c10c..650d77d7 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -92,3 +92,19 @@ steps: - DA_AWS_ACCESS_KEY_ID - DA_AWS_SECRET_ACCESS_KEY - AWS_DEFAULT_REGION + - label: gcp ubuntu + key: tiered-up-gcp-ubuntu + command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "ubuntu-os-cloud/ubuntu-2204-lts" --prefix "ci-ub-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$GCP_CREDS" + plugins: + - docker#v5.4.0: + image: glrp/atgt:latest + environment: + - GCP_CREDS + - label: gcp fedora + key: tiered-up-gcp-fedora + command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "fedora-cloud/fedora-cloud-37" --prefix "ci-fd-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$GCP_CREDS" + plugins: + - docker#v5.4.0: + image: glrp/atgt:latest + environment: + - GCP_CREDS diff --git a/.buildkite/scripts/standup-gcp-cluster.sh b/.buildkite/scripts/standup-gcp-cluster.sh new file mode 100755 index 00000000..6564a6fd --- /dev/null +++ b/.buildkite/scripts/standup-gcp-cluster.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +export GCP_CREDS=$GCP_CREDS + +# Parse command line arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --tf_dir) TF_DIR="$2"; shift ;; + --prefix) PREFIX="$2"; shift ;; + --gcp_creds) GCP_CREDS="$2"; shift ;; + --cluster_type) TASK_NAME="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done + +# Check if TF_DIR and PREFIX are set +if [ -z "$TF_DIR" ] || [ -z "$PREFIX" ] || [ -z "$GCP_CREDS" ] || [ -z "$TASK_NAME" ]; then + echo "TF_DIR, PREFIX, CLUSTER_TYPE and GCP_CREDS must be set. Exiting." + exit 1 +fi + +cd "$TF_DIR" || exit 1 +export HOSTS_FILE_DIR="$(pwd)/../../artifacts/hosts_gcp_${PREFIX}.ini" +export KEY_FILE="$(pwd)/../../artifacts/testkey" +if [ "$TF_DIR" == "gcp" ]; then + export HOSTS_FILE_DIR="$(pwd)/../artifacts/hosts_gcp_${PREFIX}.ini" + export KEY_FILE="$(pwd)/../artifacts/testkey" +fi + +ssh-keygen -t rsa -b 4096 -C "test@redpanda.com" -N "" -f "$KEY_FILE" <<< y && chmod 0700 "$KEY_FILE" + +# Trap to handle terraform destroy on exit +trap cleanup EXIT INT TERM +cleanup() { + error_code=$? + terraform destroy --auto-approve --var="gcp_creds=$GCP_CREDS" --var="deployment_prefix=$PREFIX" --var="public_key_path=$KEY_FILE" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" + rm -f "$KEY_FILE" + rm -f "${KEY_FILE}.pub" + exit $error_code +} + +terraform init +terraform apply --auto-approve --var="deployment_prefix=$PREFIX" --var="gcp_creds=$GCP_CREDS" --var="public_key_path=$KEY_FILE.pub" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" + +echo "building cluster" +DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO IS_USING_UNSTABLE=$UNSTABLE CLOUD_PROVIDER="gcp" task "create-$TASK_NAME" +error_code=$? +if [ $error_code -ne 0 ]; then + echo "error in ansible standup" + exit 1 +fi + +echo "testing cluster" +DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO CLOUD_PROVIDER="gcp" task "test-$TASK_NAME" +error_code=$? +if [ $error_code -ne 0 ]; then + echo "error in test-tls-cluster" + exit 1 +fi + +# Trap will handle destroy so just exit +exit $? diff --git a/.buildkite/scripts/test-proxy-cluster.sh b/.buildkite/scripts/test-proxy-cluster.sh index 4ba653d2..fdabf803 100755 --- a/.buildkite/scripts/test-proxy-cluster.sh +++ b/.buildkite/scripts/test-proxy-cluster.sh @@ -15,6 +15,9 @@ while [ $# -gt 0 ]; do --sshkey=*) SSHKEY="${1#*=}" ;; + --cloud=*) + CLOUD_PROVIDER="${1#*=}" + ;; *) echo "Invalid argument: $1" exit 1 @@ -68,6 +71,10 @@ echo "consuming from topic" testoutput=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i $SSHKEY $CLIENT_SSH_USER@$CLIENT_PUBLIC_IP 'rpk topic consume testtopic --brokers '"$REDPANDA_BROKERS"' --tls-truststore '"$PATH_TO_CA_CRT"' -v -o :end') echo $testoutput | grep squirrels || exit 1 +if [ "$CLOUD_PROVIDER" == "gcp" ]; then + echo "success" + exit 0 +fi echo "checking that bucket is not empty" # Check if the bucket is empty object_count=$(aws s3api list-objects --bucket "${BUCKET_NAME}" --region us-west-2 --output json | jq '.Contents | length') diff --git a/.buildkite/scripts/test-tiered-storage-cluster.sh b/.buildkite/scripts/test-tiered-storage-cluster.sh index 0e13f53f..f26298ae 100755 --- a/.buildkite/scripts/test-tiered-storage-cluster.sh +++ b/.buildkite/scripts/test-tiered-storage-cluster.sh @@ -15,6 +15,9 @@ while [ $# -gt 0 ]; do --bucket=*) BUCKET_NAME="${1#*=}" ;; + --cloud=*) + CLOUD_PROVIDER="${1#*=}" + ;; *) echo "Invalid argument: $1" exit 1 @@ -71,7 +74,12 @@ testoutput=$("${PATH_TO_RPK_FILE}" topic consume testtopic --brokers "$REDPANDA_ echo $testoutput | grep squirrels || exit 1 echo "testing schema registry" -for ip_port in $(echo $REDPANDA_REGISTRY | tr ',' ' '); do curl $ip_port/subjects -k --cacert "$PATH_TO_CA_CRT" ; done +for ip_port in $(echo $REDPANDA_REGISTRY | tr ',' ' '); do curl $ip_port/subjects -k --cacert "$PATH_TO_CA_CRT" ; done + +if [ "$CLOUD_PROVIDER" == "gcp" ]; then + echo "success" + exit 0 +fi echo "checking that bucket is not empty" # Check if the bucket is empty diff --git a/Taskfile.yaml b/Taskfile.yaml index bb47ddd3..f6090548 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -187,7 +187,7 @@ tasks: vars: SEGMENT_UPLOAD_INTERVAL: '{{ default "1" .SEGMENT_UPLOAD_INTERVAL }}' IS_USING_UNSTABLE: '{{ default false .IS_USING_UNSTABLE }}' - ANSIBLE_PLAYBOOK: '{{ default "provision-private-proxied-cluster.yml" .ANSIBLE_PLAYBOOK }}' + ANSIBLE_PLAYBOOK: '{{ default "proxy/provision-private-proxied-cluster.yml" .ANSIBLE_PLAYBOOK }}' SKIP_TAGS: '{{ default "" .SKIP_TAGS }}' cmds: - task: basic @@ -227,7 +227,7 @@ tasks: RPK_LOC: '{{.ARTIFACT_DIR}}/bin/rpk' # this needs to go somewhere else badly CA_CRT: '{{ default "ansible/tls/ca/ca.crt" .CA_CRT }}' - ANSIBLE_INVENTORY: '{{.ARTIFACT_DIR}}/hosts_{{.CLOUD_PROVIDER}}_{{.DEPLOYMENT_ID}}.ini' + ANSIBLE_INVENTORY: '{{.ANSIBLE_INVENTORY}}' cmds: - '{{.TASKFILE_DIR}}/.buildkite/scripts/test-tls-cluster.sh --hosts={{.ANSIBLE_INVENTORY}} --cert={{.CA_CRT}} --rpk={{.RPK_LOC}}' @@ -239,7 +239,7 @@ tasks: RPK_LOC: '{{.ARTIFACT_DIR}}/bin/rpk' # this needs to go somewhere else badly CA_CRT: '{{ default "ansible/tls/ca/ca.crt" .CA_CRT }}' - ANSIBLE_INVENTORY: '{{.ARTIFACT_DIR}}/hosts_{{.CLOUD_PROVIDER}}_{{.DEPLOYMENT_ID}}.ini' + ANSIBLE_INVENTORY: '{{.ANSIBLE_INVENTORY}}' cmds: - '{{.TASKFILE_DIR}}/.buildkite/scripts/test-tiered-storage-cluster.sh --hosts={{.ANSIBLE_INVENTORY}} --cert={{.CA_CRT}} --rpk={{.RPK_LOC}} --bucket={{.BUCKET_NAME}}' @@ -247,6 +247,6 @@ tasks: desc: tests that a proxied cluster is WAD vars: CA_CRT: '{{ default "/opt/rpk/certs/ca.crt" .CA_CRT }}' - ANSIBLE_INVENTORY: '{{.ARTIFACT_DIR}}/hosts_{{.CLOUD_PROVIDER}}_{{.DEPLOYMENT_ID}}.ini' + ANSIBLE_INVENTORY: '{{.ANSIBLE_INVENTORY}}' cmds: - '{{.TASKFILE_DIR}}/.buildkite/scripts/test-proxy-cluster.sh --hosts={{.ANSIBLE_INVENTORY}} --cert={{.CA_CRT}} --bucket={{.BUCKET_NAME}} --sshkey=artifacts/testkey' diff --git a/gcp/readme.md b/gcp/README.md similarity index 100% rename from gcp/readme.md rename to gcp/README.md diff --git a/gcp/cluster.tf b/gcp/cluster.tf deleted file mode 100644 index bcc8c6d5..00000000 --- a/gcp/cluster.tf +++ /dev/null @@ -1,157 +0,0 @@ -resource "random_uuid" "cluster" {} - -locals { - uuid = random_uuid.cluster.result - deployment_id = random_uuid.cluster.result -} - -resource "google_compute_resource_policy" "redpanda-rp" { - name = "redpanda-rp" - region = var.region - group_placement_policy { - availability_domain_count = var.ha ? max(3, var.nodes) : 1 - } - count = var.ha ? 1 : 0 -} - -resource "google_compute_instance" "redpanda" { - count = var.nodes - name = "rp-node-${count.index}-${local.deployment_id}" - tags = ["rp-cluster", "tf-deployment-${local.deployment_id}"] - zone = "${var.region}-${var.availability_zone[count.index % length(var.availability_zone)]}" - machine_type = var.machine_type - // GCP does not give you visibility nor control over which failure domain a resource has been placed into - // (https://issuetracker.google.com/issues/256993209?pli=1). So the only way that we can guarantee that - // specific nodes are in separate racks is to put them into entirely separate failure domains - basically one - // broker per failure domain, and we are limited by the number of failure domains (at the moment 8). - resource_policies = (var.ha && var.nodes <= 8) ? [ - google_compute_resource_policy.redpanda-rp[0].id - ] : null - - metadata = { - ssh-keys = < 1 - availability_zone = google_compute_instance.redpanda[*].zone - cloud_storage_region = var.region - tiered_storage_enabled = false - tiered_storage_bucket_name = "" - } - ) - filename = "${path.module}/../hosts.ini" -} diff --git a/gcp/main.tf b/gcp/main.tf new file mode 100644 index 00000000..743d56fd --- /dev/null +++ b/gcp/main.tf @@ -0,0 +1,157 @@ +variable "deployment_prefix" { + type = string + default = "rp-test" +} + +resource "google_compute_network" "test-net" { + name = "${var.deployment_prefix}-test-net" + auto_create_subnetworks = "false" +} + +resource "google_compute_subnetwork" "test-subnet" { + name = "${var.deployment_prefix}-test-sub" + ip_cidr_range = "10.0.0.0/16" + region = var.region + network = google_compute_network.test-net.self_link +} + +resource "google_compute_firewall" "test-fire" { + name = "${var.deployment_prefix}-test-fire" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["22", "3000", "8888", "8889", "9090", "9092", "9100", "9644", "33145"] + } + + source_ranges = ["0.0.0.0/0"] +} + +module "redpanda-cluster" { + source = "redpanda-data/redpanda-cluster/gcp" + version = ">= 0.6.3" + region = var.region + + ssh_user = var.ssh_user + subnet = coalesce(var.subnet, google_compute_subnetwork.test-subnet.id) + image = var.image + availability_zone = var.availability_zone + broker_count = var.nodes + client_count = var.client_nodes + disks = var.disks + ha = var.ha + broker_machine_type = var.machine_type + client_machine_type = var.client_machine_type + monitor_machine_type = var.monitor_machine_type + public_key_path = var.public_key_path + enable_monitoring = var.enable_monitoring + labels = var.labels + deployment_prefix = var.deployment_prefix + hosts_file = var.hosts_file +} + +provider "google" { + region = var.region + project = var.project_name + credentials = base64decode(var.gcp_creds) +} + +variable "gcp_creds" { + default = "" + type = string + description = "base64 encoded contents of the key for a service account with all necessary permissions" +} + +variable "region" { + default = "us-west2" +} + +variable "availability_zone" { + description = "The zone where the cluster will be deployed [a,b,...]" + default = ["a"] + type = list(string) +} + +variable "subnet" { + description = "The name of the existing subnet where the machines will be deployed" + default = "" +} + +variable "project_name" { + default = "hallowed-ray-376320" + type = string + description = "The project name on GCP." +} + +variable "nodes" { + description = "The number of nodes to deploy." + type = number + default = "3" +} + +variable "ha" { + description = "Whether to use placement groups to create an HA topology" + type = bool + default = false +} + +variable "client_nodes" { + description = "The number of clients to deploy." + type = number + default = "1" +} + +variable "disks" { + description = "The number of local disks on each machine." + type = number + default = 1 +} + +variable "image" { + # See https://cloud.google.com/compute/docs/images#os-compute-support + # for an updated list. + default = "ubuntu-os-cloud/ubuntu-2004-lts" +} + +variable machine_type { + # List of available machines per region/ zone: + # https://cloud.google.com/compute/docs/regions-zones#available + default = "n2-standard-2" +} + +variable monitor_machine_type { + default = "n2-standard-2" +} + +variable client_machine_type { + default = "n2-standard-2" +} + +variable "public_key_path" { + description = "The ssh key." + default = "~/.ssh/id_rsa.pub" +} + +variable "ssh_user" { + description = "The ssh user. Must match the one in the public ssh key's comments." + default = "ubuntu" + type = string +} + +variable "enable_monitoring" { + default = true +} + +variable "labels" { + description = "passthrough of GCP labels" + default = { + "purpose" = "redpanda-cluster" + "created-with" = "terraform" + } +} + +variable "hosts_file" { + type = string + description = "location of ansible hosts file" + default = "../hosts.ini" +} diff --git a/gcp/outputs.tf b/gcp/outputs.tf deleted file mode 100644 index 50290b45..00000000 --- a/gcp/outputs.tf +++ /dev/null @@ -1,15 +0,0 @@ -output "ip" { - value = google_compute_instance.redpanda[*].network_interface.0.access_config.0.nat_ip -} - -output "private_ips" { - value = google_compute_instance.redpanda[*].network_interface.0.network_ip -} - -output "ssh_user" { - value = var.ssh_user -} - -output "public_key_path" { - value = var.public_key_path -} diff --git a/gcp/private-test/main.tf b/gcp/private-test/main.tf new file mode 100644 index 00000000..19b52af4 --- /dev/null +++ b/gcp/private-test/main.tf @@ -0,0 +1,156 @@ +variable "gcp_creds" { + default = "" + description = "base64 encoded json GCP key file for a service account" +} + +provider "google" { + project = var.project_name + region = var.region + credentials = base64decode(var.gcp_creds) +} + +variable "region" { + type = string + default = "us-central1" +} +variable "public_key_path" { + default = "" + type = string +} +module "redpanda-cluster" { + source = "redpanda-data/redpanda-cluster/gcp" + version = ">= 0.6.3" + ssh_user = "ubuntu" + subnet = google_compute_subnetwork.test-subnet.id + region = var.region + enable_tiered_storage = true + allow_force_destroy = true + allocate_brokers_public_ip = false + public_key_path = var.public_key_path + deployment_prefix = var.deployment_prefix + hosts_file = var.hosts_file +} + +resource "google_compute_network" "test-net" { + name = "${var.deployment_prefix}-proxy-net" + auto_create_subnetworks = "false" +} + +resource "google_compute_subnetwork" "test-subnet" { + name = "${var.deployment_prefix}-test-sub" + ip_cidr_range = "10.0.0.0/16" + region = var.region + network = google_compute_network.test-net.self_link +} + +resource "google_compute_firewall" "broker-broker" { + name = "${var.deployment_prefix}-allow-broker-to-broker" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + + source_tags = ["broker"] + target_tags = ["broker"] +} + +resource "google_compute_firewall" "broker-to-client" { + name = "${var.deployment_prefix}-broker-to-client" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["3128"] + } + + source_tags = ["broker"] + target_tags = ["client"] +} + +resource "google_compute_firewall" "client-to-broker" { + name = "${var.deployment_prefix}-client-to-broker" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + + target_tags = ["broker"] + source_tags = ["client"] +} + + +resource "google_compute_firewall" "client-to-internet" { + name = "${var.deployment_prefix}-client-to-internet" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + source_tags = ["client"] + destination_ranges = ["0.0.0.0/0"] +} + +resource "google_compute_firewall" "internet-to-client" { + name = "${var.deployment_prefix}-internet-to-client" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["22"] + } + target_tags = ["client"] + source_ranges = ["0.0.0.0/0"] +} + +resource "google_compute_firewall" "internet-to-monitor" { + name = "${var.deployment_prefix}-internet-to-monitor" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + target_tags = ["monitor"] + source_ranges = ["0.0.0.0/0"] +} +resource "google_compute_firewall" "monitor-to-internet" { + name = "${var.deployment_prefix}-monitor-to-internet" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + source_tags = ["monitor"] + destination_ranges = ["0.0.0.0/0"] +} + +resource "google_compute_firewall" "monitor-to-broker" { + name = "${var.deployment_prefix}-monitor-to-broker" + network = google_compute_network.test-net.name + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + + target_tags = ["broker"] + source_tags = ["monitor"] +} + +variable "deployment_prefix" { + type = string +} + +variable "project_name" { + default = "" +} + +variable "hosts_file" { + type = string +} diff --git a/gcp/provider.tf b/gcp/provider.tf deleted file mode 100644 index 6dcf8c04..00000000 --- a/gcp/provider.tf +++ /dev/null @@ -1,4 +0,0 @@ -provider "google" { - project = var.project_name - region = var.region -} diff --git a/gcp/vars.tf b/gcp/vars.tf deleted file mode 100644 index 3415c2db..00000000 --- a/gcp/vars.tf +++ /dev/null @@ -1,86 +0,0 @@ -variable "region" { - default = "us-west2" -} - -variable "availability_zone" { - description = "The zone where the cluster will be deployed [a,b,...]" - default = ["a"] - type = list(string) -} - -variable "instance_group_name" { - description = "The name of the GCP instance group" - default = "redpanda-group" -} - -variable "subnet" { - description = "The name of the existing subnet where the machines will be deployed" -} - -variable "project_name" { - description = "The project name on GCP." -} - -variable "nodes" { - description = "The number of nodes to deploy." - type = number - default = "3" -} - -variable "ha" { - description = "Whether to use placement groups to create an HA topology" - type = bool - default = false -} - -variable "client_nodes" { - description = "The number of clients to deploy." - type = number - default = "1" -} - -variable "disks" { - description = "The number of local disks on each machine." - type = number - default = 1 -} - -variable "image" { - # See https://cloud.google.com/compute/docs/images#os-compute-support - # for an updated list. - default = "ubuntu-os-cloud/ubuntu-2004-lts" -} - -variable machine_type { - # List of available machines per region/ zone: - # https://cloud.google.com/compute/docs/regions-zones#available - default = "n2-standard-2" -} - -variable monitor_machine_type { - default = "n2-standard-2" -} - -variable client_machine_type { - default = "n2-standard-2" -} - -variable "public_key_path" { - description = "The ssh key." -} - -variable "ssh_user" { - description = "The ssh user. Must match the one in the public ssh key's comments." -} - -variable "enable_monitoring" { - default = "yes" -} - -variable "labels" { - description = "passthrough of GCP labels" - default = { - "purpose" = "redpanda-cluster" - "created-with" = "terraform" - } -} From 96bda37bae6b3ea03a91a632e8b817b1133f922a Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:00:59 -0400 Subject: [PATCH 2/6] fixes and improvements CI now works when run locally, still need to validate that it runs on buildkite. Chief issue is the passing in of GCP creds -- not sure what I have will pass muster tbh. Most likely will need to rebuild client image to accept GCP_CREDS as an env var and then use that directly as an ENV var in the script. But want to validate this first :sweat_smile: Also did some code stuff * Loosened some firewall rules * bumped the basic ubuntu version to be less ancient * fixed the build script and passed it correctly in buildkite * allowed passing in the squid_acl so that the proxy correctly handles passthru. This need is created by the difference in subnetting techniques between GCP and AWS --- .buildkite/scripts/standup-gcp-cluster.sh | 5 +++-- Taskfile.yaml | 3 ++- gcp/main.tf | 2 +- gcp/private-test/main.tf | 8 +++++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.buildkite/scripts/standup-gcp-cluster.sh b/.buildkite/scripts/standup-gcp-cluster.sh index 6564a6fd..557af4c8 100755 --- a/.buildkite/scripts/standup-gcp-cluster.sh +++ b/.buildkite/scripts/standup-gcp-cluster.sh @@ -9,6 +9,7 @@ while [[ "$#" -gt 0 ]]; do --prefix) PREFIX="$2"; shift ;; --gcp_creds) GCP_CREDS="$2"; shift ;; --cluster_type) TASK_NAME="$2"; shift ;; + --image) IMAGE="$2"; shift ;; *) echo "Unknown parameter passed: $1"; exit 1 ;; esac shift @@ -41,10 +42,10 @@ cleanup() { } terraform init -terraform apply --auto-approve --var="deployment_prefix=$PREFIX" --var="gcp_creds=$GCP_CREDS" --var="public_key_path=$KEY_FILE.pub" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" +terraform apply --auto-approve --var="image=$IMAGE" --var="deployment_prefix=$PREFIX" --var="gcp_creds=$GCP_CREDS" --var="public_key_path=$KEY_FILE.pub" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" echo "building cluster" -DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO IS_USING_UNSTABLE=$UNSTABLE CLOUD_PROVIDER="gcp" task "create-$TASK_NAME" +DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO IS_USING_UNSTABLE=$UNSTABLE SQUID_ACL_LOCALNET="10.0.0.0/24" CLOUD_PROVIDER="gcp" task "create-$TASK_NAME" error_code=$? if [ $error_code -ne 0 ]; then echo "error in ansible standup" diff --git a/Taskfile.yaml b/Taskfile.yaml index f6090548..d3f6ed19 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -189,11 +189,12 @@ tasks: IS_USING_UNSTABLE: '{{ default false .IS_USING_UNSTABLE }}' ANSIBLE_PLAYBOOK: '{{ default "proxy/provision-private-proxied-cluster.yml" .ANSIBLE_PLAYBOOK }}' SKIP_TAGS: '{{ default "" .SKIP_TAGS }}' + SQUID_ACL_LOCALNET: '{{ default "" .SQUID_ACL_LOCALNET }}' cmds: - task: basic vars: { ANSIBLE_PLAYBOOK: "{{ .ANSIBLE_PLAYBOOK }}", - CLI_ARGS: "--extra-vars redpanda='{\"cluster\":{\"cloud_storage_segment_max_upload_interval_sec\":\"{{.SEGMENT_UPLOAD_INTERVAL}}\"}}'", + CLI_ARGS: "--extra-vars '{\"squid_acl_localnet\": [\"{{.SQUID_ACL_LOCALNET}}\"]}' --extra-vars redpanda='{\"cluster\":{\"cloud_storage_segment_max_upload_interval_sec\":\"{{.SEGMENT_UPLOAD_INTERVAL}}\"}}'", IS_USING_UNSTABLE: "{{ .IS_USING_UNSTABLE }}", SKIP_TAGS: "{{ .SKIP_TAGS }}" } diff --git a/gcp/main.tf b/gcp/main.tf index 743d56fd..f2933945 100644 --- a/gcp/main.tf +++ b/gcp/main.tf @@ -110,7 +110,7 @@ variable "disks" { variable "image" { # See https://cloud.google.com/compute/docs/images#os-compute-support # for an updated list. - default = "ubuntu-os-cloud/ubuntu-2004-lts" + default = "ubuntu-os-cloud/ubuntu-2204-lts" } variable machine_type { diff --git a/gcp/private-test/main.tf b/gcp/private-test/main.tf index 19b52af4..f11a4c2e 100644 --- a/gcp/private-test/main.tf +++ b/gcp/private-test/main.tf @@ -29,6 +29,7 @@ module "redpanda-cluster" { public_key_path = var.public_key_path deployment_prefix = var.deployment_prefix hosts_file = var.hosts_file + image = var.image } resource "google_compute_network" "test-net" { @@ -101,7 +102,7 @@ resource "google_compute_firewall" "internet-to-client" { allow { protocol = "tcp" - ports = ["22"] + ports = ["22", "80", "443", "21", "20"] } target_tags = ["client"] source_ranges = ["0.0.0.0/0"] @@ -154,3 +155,8 @@ variable "project_name" { variable "hosts_file" { type = string } + +variable "image" { + default = "ubuntu-os-cloud/ubuntu-2204-lts" + type = string +} From afc6227eee72e7f371475b2ee5db4029dc0ad560 Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Fri, 22 Sep 2023 17:30:40 -0400 Subject: [PATCH 3/6] buildkite related work --- .buildkite/pipeline.yml | 32 +++++++++++------------ .buildkite/scripts/standup-gcp-cluster.sh | 4 +-- Dockerfile_FEDORA | 1 + Dockerfile_UBUNTU | 10 ++----- gcp/main.tf | 2 +- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 650d77d7..26d8fae8 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -2,6 +2,22 @@ agents: queue: "k8s-builders" steps: + - label: gcp ubuntu + key: tiered-up-gcp-ubuntu + command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "ubuntu-os-cloud/ubuntu-2204-lts" --prefix "ci-ub-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$DEVEX_GCP_CREDS_BASE64" + plugins: + - docker#v5.4.0: + image: glrp/atgt:latest + environment: + - DEVEX_GCP_CREDS_BASE64 + - label: gcp fedora + key: tiered-up-gcp-fedora + command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "fedora-cloud/fedora-cloud-37" --prefix "ci-fd-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$DEVEX_GCP_CREDS_BASE64" + plugins: + - docker#v5.4.0: + image: glrp/atgt:latest + environment: + - DEVEX_GCP_CREDS_BASE64 - label: test basic cluster standup ubuntu key: basic-up-ubuntu command: .buildkite/scripts/standup-cluster.sh --prefix=rp-basic-ub --distro=ubuntu-focal --tiered=false --unstable=false --taskname=basic-cluster @@ -92,19 +108,3 @@ steps: - DA_AWS_ACCESS_KEY_ID - DA_AWS_SECRET_ACCESS_KEY - AWS_DEFAULT_REGION - - label: gcp ubuntu - key: tiered-up-gcp-ubuntu - command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "ubuntu-os-cloud/ubuntu-2204-lts" --prefix "ci-ub-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$GCP_CREDS" - plugins: - - docker#v5.4.0: - image: glrp/atgt:latest - environment: - - GCP_CREDS - - label: gcp fedora - key: tiered-up-gcp-fedora - command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "fedora-cloud/fedora-cloud-37" --prefix "ci-fd-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$GCP_CREDS" - plugins: - - docker#v5.4.0: - image: glrp/atgt:latest - environment: - - GCP_CREDS diff --git a/.buildkite/scripts/standup-gcp-cluster.sh b/.buildkite/scripts/standup-gcp-cluster.sh index 557af4c8..ae7d1262 100755 --- a/.buildkite/scripts/standup-gcp-cluster.sh +++ b/.buildkite/scripts/standup-gcp-cluster.sh @@ -1,7 +1,5 @@ #!/bin/bash -export GCP_CREDS=$GCP_CREDS - # Parse command line arguments while [[ "$#" -gt 0 ]]; do case $1 in @@ -17,6 +15,8 @@ done # Check if TF_DIR and PREFIX are set if [ -z "$TF_DIR" ] || [ -z "$PREFIX" ] || [ -z "$GCP_CREDS" ] || [ -z "$TASK_NAME" ]; then + echo "TF_DIR : $TF_DIR" + echo "TASK_NAME : $TASK_NAME" echo "TF_DIR, PREFIX, CLUSTER_TYPE and GCP_CREDS must be set. Exiting." exit 1 fi diff --git a/Dockerfile_FEDORA b/Dockerfile_FEDORA index 61eea2e5..9e2879a6 100644 --- a/Dockerfile_FEDORA +++ b/Dockerfile_FEDORA @@ -3,6 +3,7 @@ FROM fedora:36 ENV DA_AWS_ACCESS_KEY_ID="default" ENV DA_AWS_SECRET_ACCESS_KEY="default" ENV AWS_DEFAULT_REGION="default" +ENV GCP_CREDS="default" # Install required packages RUN dnf -y update \ diff --git a/Dockerfile_UBUNTU b/Dockerfile_UBUNTU index dbc9c026..a390930b 100644 --- a/Dockerfile_UBUNTU +++ b/Dockerfile_UBUNTU @@ -4,19 +4,13 @@ ARG DEBIAN_FRONTEND=noninteractive ENV DA_AWS_ACCESS_KEY_ID="default" ENV DA_AWS_SECRET_ACCESS_KEY="default" ENV AWS_DEFAULT_REGION="default" +ENV GCP_CREDS="default" # Install required packages RUN apt-get update \ - && apt install unzip -y \ - && apt install wget -y \ - && apt install curl -y \ - && apt install vim -y \ - && apt-get install -y git \ - && apt-get install -y software-properties-common \ + && apt install -y unzip wget curl vim git software-properties-common awscli jq \ && apt-add-repository --yes --update ppa:ansible/ansible \ && apt-get install -y ansible \ - && apt-get install -y awscli \ - && apt-get install -y jq \ && rm -rf /var/lib/apt/lists/* # Install Terraform diff --git a/gcp/main.tf b/gcp/main.tf index f2933945..e22d3006 100644 --- a/gcp/main.tf +++ b/gcp/main.tf @@ -21,7 +21,7 @@ resource "google_compute_firewall" "test-fire" { allow { protocol = "tcp" - ports = ["22", "3000", "8888", "8889", "9090", "9092", "9100", "9644", "33145"] + ports = ["22", "3000", "8888", "8889", "9090", "9092", "9100", "9644", "33145", "8081"] } source_ranges = ["0.0.0.0/0"] From b4a52bd67c93f9cffee5ecbb11cab7a2523aeff7 Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Wed, 27 Sep 2023 18:06:28 -0400 Subject: [PATCH 4/6] fix gcp ci for buckets --- .buildkite/scripts/standup-gcp-cluster.sh | 9 ++--- .../scripts/test-tiered-storage-cluster.sh | 21 ++++++++---- Dockerfile_FEDORA | 8 +++++ Dockerfile_UBUNTU | 7 +++- Taskfile.yaml | 4 ++- gcp/main.tf | 34 ++++++++++--------- 6 files changed, 54 insertions(+), 29 deletions(-) diff --git a/.buildkite/scripts/standup-gcp-cluster.sh b/.buildkite/scripts/standup-gcp-cluster.sh index ae7d1262..5b4ef97b 100755 --- a/.buildkite/scripts/standup-gcp-cluster.sh +++ b/.buildkite/scripts/standup-gcp-cluster.sh @@ -35,7 +35,8 @@ ssh-keygen -t rsa -b 4096 -C "test@redpanda.com" -N "" -f "$KEY_FILE" <<< y && c trap cleanup EXIT INT TERM cleanup() { error_code=$? - terraform destroy --auto-approve --var="gcp_creds=$GCP_CREDS" --var="deployment_prefix=$PREFIX" --var="public_key_path=$KEY_FILE" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" + terraform destroy --auto-approve --var="gcp_creds=$GCP_CREDS" --var="deployment_prefix=$PREFIX" --var="public_key_path=$KEY_FILE" --var="project_name=t" --var="hosts_file=$HOSTS_FILE_DIR" + rm -rf /app/ansible/tls rm -f "$KEY_FILE" rm -f "${KEY_FILE}.pub" exit $error_code @@ -45,10 +46,10 @@ terraform init terraform apply --auto-approve --var="image=$IMAGE" --var="deployment_prefix=$PREFIX" --var="gcp_creds=$GCP_CREDS" --var="public_key_path=$KEY_FILE.pub" --var="project_name=hallowed-ray-376320" --var="hosts_file=$HOSTS_FILE_DIR" echo "building cluster" -DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO IS_USING_UNSTABLE=$UNSTABLE SQUID_ACL_LOCALNET="10.0.0.0/24" CLOUD_PROVIDER="gcp" task "create-$TASK_NAME" +DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO IS_USING_UNSTABLE=$UNSTABLE CLOUD_STORAGE_CREDENTIALS_SOURCE="gcp_instance_metadata" SQUID_ACL_LOCALNET="10.0.0.0/24" CLOUD_PROVIDER="gcp" task "create-$TASK_NAME" error_code=$? if [ $error_code -ne 0 ]; then - echo "error in ansible standup" + echo "error in ansible standup $TASK_NAME" exit 1 fi @@ -56,7 +57,7 @@ echo "testing cluster" DEPLOYMENT_ID=$PREFIX DISTRO=$DISTRO CLOUD_PROVIDER="gcp" task "test-$TASK_NAME" error_code=$? if [ $error_code -ne 0 ]; then - echo "error in test-tls-cluster" + echo "error in test $TASK_NAME" exit 1 fi diff --git a/.buildkite/scripts/test-tiered-storage-cluster.sh b/.buildkite/scripts/test-tiered-storage-cluster.sh index f26298ae..37eac8f5 100755 --- a/.buildkite/scripts/test-tiered-storage-cluster.sh +++ b/.buildkite/scripts/test-tiered-storage-cluster.sh @@ -77,18 +77,25 @@ echo "testing schema registry" for ip_port in $(echo $REDPANDA_REGISTRY | tr ',' ' '); do curl $ip_port/subjects -k --cacert "$PATH_TO_CA_CRT" ; done if [ "$CLOUD_PROVIDER" == "gcp" ]; then + echo "checking that gcp bucket is not empty" + echo "$DEVEX_GCP_CREDS_BASE64" | base64 -d > /tmp/gcp_creds.json + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp_creds.json" + export CLOUDSDK_CORE_PROJECT=hallowed-ray-376320 + gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS + echo $BUCKET_NAME + if [ $(gcloud storage ls $(gcloud storage ls | grep ${BUCKET_NAME%-bucket}) | wc -l) -gt 1 ]; then + echo "success" + exit 0 + fi +else + echo "checking that aws bucket is not empty" + # Check if the bucket is empty + object_count=$(aws s3api list-objects --bucket "${BUCKET_NAME}" --region us-west-2 --output json | jq '.Contents | length') echo "success" exit 0 fi -echo "checking that bucket is not empty" -# Check if the bucket is empty -object_count=$(aws s3api list-objects --bucket "${BUCKET_NAME}" --region us-west-2 --output json | jq '.Contents | length') -if [ "$object_count" -gt 0 ]; then - echo "success" - exit 0 -fi echo "fail" exit 1 diff --git a/Dockerfile_FEDORA b/Dockerfile_FEDORA index 9e2879a6..aca8f2b8 100644 --- a/Dockerfile_FEDORA +++ b/Dockerfile_FEDORA @@ -8,6 +8,7 @@ ENV GCP_CREDS="default" # Install required packages RUN dnf -y update \ && dnf install -y unzip wget glibc curl vim git ansible jq openssl \ + && dnf clean all \ && rm -rf /var/cache/dnf/* # Install Terraform @@ -20,6 +21,13 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv && unzip awscliv2.zip \ && ./aws/install +RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-448.0.0-linux-x86_64.tar.gz && \ + tar -zxvf google-cloud-cli-448.0.0-linux-x86_64.tar.gz && \ + ./google-cloud-sdk/install.sh --quiet && \ + rm google-cloud-cli-448.0.0-linux-x86_64.tar.gz + +ENV PATH="/google-cloud-sdk/bin:${PATH}" + # Install task RUN curl -sSLf "https://github.com/go-task/task/releases/download/v3.21.0/task_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin diff --git a/Dockerfile_UBUNTU b/Dockerfile_UBUNTU index a390930b..36dbb279 100644 --- a/Dockerfile_UBUNTU +++ b/Dockerfile_UBUNTU @@ -8,11 +8,16 @@ ENV GCP_CREDS="default" # Install required packages RUN apt-get update \ - && apt install -y unzip wget curl vim git software-properties-common awscli jq \ + && apt install -y unzip wget curl vim git software-properties-common awscli jq lsb-release gnupg \ && apt-add-repository --yes --update ppa:ansible/ansible \ && apt-get install -y ansible \ + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ + && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \ + && apt-get update \ + && apt-get install -y google-cloud-sdk \ && rm -rf /var/lib/apt/lists/* + # Install Terraform RUN wget https://releases.hashicorp.com/terraform/1.4.5/terraform_1.4.5_linux_amd64.zip \ && unzip terraform_1.4.5_linux_amd64.zip -d /usr/local/bin \ diff --git a/Taskfile.yaml b/Taskfile.yaml index d3f6ed19..055fb4ba 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -118,6 +118,7 @@ tasks: SKIP_TAGS: '{{ default "" .SKIP_TAGS }}' cmds: - mkdir -p {{.ARTIFACT_DIR}}/logs + - echo ansible-playbook ansible/{{.ANSIBLE_PLAYBOOK}} --private-key {{.PRIVATE_KEY}} --extra-vars is_using_unstable={{.IS_USING_UNSTABLE}} {{ .SKIP_TAGS }} {{.CLI_ARGS}} - ansible-playbook ansible/{{.ANSIBLE_PLAYBOOK}} --private-key {{.PRIVATE_KEY}} --extra-vars is_using_unstable={{.IS_USING_UNSTABLE}} {{ .SKIP_TAGS }} {{.CLI_ARGS}} create-tls-cluster: @@ -162,11 +163,12 @@ tasks: SEGMENT_UPLOAD_INTERVAL: '{{ default "1" .SEGMENT_UPLOAD_INTERVAL }}' IS_USING_UNSTABLE: '{{ default false .IS_USING_UNSTABLE }}' ANSIBLE_PLAYBOOK: '{{ default "provision-tiered-storage-cluster.yml" .ANSIBLE_PLAYBOOK }}' + CLOUD_STORAGE_CREDENTIALS_SOURCE: '{{ default "aws_instance_metadata" .CLOUD_STORAGE_CREDENTIALS_SOURCE}}' cmds: - task: basic vars: { ANSIBLE_PLAYBOOK: "{{ .ANSIBLE_PLAYBOOK }}", - CLI_ARGS: "--extra-vars redpanda='{\"cluster\":{\"cloud_storage_segment_max_upload_interval_sec\":\"{{.SEGMENT_UPLOAD_INTERVAL}}\"}}'", + CLI_ARGS: "--extra-vars cloud_storage_credentials_source='{{ .CLOUD_STORAGE_CREDENTIALS_SOURCE }}' --extra-vars redpanda='{\"cluster\":{\"cloud_storage_segment_max_upload_interval_sec\":\"{{.SEGMENT_UPLOAD_INTERVAL}}\"}}'", IS_USING_UNSTABLE: "{{ .IS_USING_UNSTABLE }}" } diff --git a/gcp/main.tf b/gcp/main.tf index e22d3006..95167341 100644 --- a/gcp/main.tf +++ b/gcp/main.tf @@ -32,22 +32,24 @@ module "redpanda-cluster" { version = ">= 0.6.3" region = var.region - ssh_user = var.ssh_user - subnet = coalesce(var.subnet, google_compute_subnetwork.test-subnet.id) - image = var.image - availability_zone = var.availability_zone - broker_count = var.nodes - client_count = var.client_nodes - disks = var.disks - ha = var.ha - broker_machine_type = var.machine_type - client_machine_type = var.client_machine_type - monitor_machine_type = var.monitor_machine_type - public_key_path = var.public_key_path - enable_monitoring = var.enable_monitoring - labels = var.labels - deployment_prefix = var.deployment_prefix - hosts_file = var.hosts_file + ssh_user = var.ssh_user + subnet = coalesce(var.subnet, google_compute_subnetwork.test-subnet.id) + image = var.image + availability_zone = var.availability_zone + broker_count = var.nodes + client_count = var.client_nodes + disks = var.disks + ha = var.ha + broker_machine_type = var.machine_type + client_machine_type = var.client_machine_type + monitor_machine_type = var.monitor_machine_type + public_key_path = var.public_key_path + enable_monitoring = var.enable_monitoring + labels = var.labels + deployment_prefix = var.deployment_prefix + hosts_file = var.hosts_file + enable_tiered_storage = true + allow_force_destroy = true } provider "google" { From 0d8bc1960bf5beab6522410bff846ddec7585670 Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:19:34 -0400 Subject: [PATCH 5/6] bump buildkite plugin --- .buildkite/pipeline.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 26d8fae8..df68158b 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -6,7 +6,7 @@ steps: key: tiered-up-gcp-ubuntu command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "ubuntu-os-cloud/ubuntu-2204-lts" --prefix "ci-ub-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$DEVEX_GCP_CREDS_BASE64" plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DEVEX_GCP_CREDS_BASE64 @@ -14,7 +14,7 @@ steps: key: tiered-up-gcp-fedora command: ./.buildkite/scripts/standup-gcp-cluster.sh --tf_dir "gcp" --image "fedora-cloud/fedora-cloud-37" --prefix "ci-fd-ts" --cluster_type "tiered-storage-cluster" --gcp_creds "$DEVEX_GCP_CREDS_BASE64" plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DEVEX_GCP_CREDS_BASE64 @@ -22,7 +22,7 @@ steps: key: basic-up-ubuntu command: .buildkite/scripts/standup-cluster.sh --prefix=rp-basic-ub --distro=ubuntu-focal --tiered=false --unstable=false --taskname=basic-cluster plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -32,7 +32,7 @@ steps: key: tiered-up-ubuntu command: .buildkite/scripts/standup-cluster.sh --prefix=rp-tier-ub --distro=ubuntu-focal --tiered=true --unstable=false --taskname=tiered-storage-cluster plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -42,7 +42,7 @@ steps: key: tiered-up-unstable-ubuntu command: .buildkite/scripts/standup-cluster.sh --prefix=rp-un-tier-ub --distro=ubuntu-focal --tiered=true --unstable=true --taskname=tiered-storage-cluster plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -52,7 +52,7 @@ steps: key: tiered-up-fedora command: .buildkite/scripts/standup-cluster.sh --prefix=rp-tier-fd --distro=Fedora-Cloud-Base-36 --tiered=true --unstable=false --taskname=tiered-storage-cluster plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -62,7 +62,7 @@ steps: key: tiered-up-unstable-fedora command: .buildkite/scripts/standup-cluster.sh --prefix=rp-un-tier-fd --distro=Fedora-Cloud-Base-36 --tiered=true --unstable=true --taskname=tiered-storage-cluster plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -72,7 +72,7 @@ steps: key: tiered-up-ubuntu-is4 command: .buildkite/scripts/standup-cluster.sh --prefix=rp-tier-ub-is --distro=ubuntu-focal --tiered=true --unstable=false --taskname=tiered-storage-cluster --machinearch=arm64 --instancetype=is4gen.4xlarge plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -82,7 +82,7 @@ steps: key: tiered-up-unstable-ubuntu-is4 command: .buildkite/scripts/standup-cluster.sh --prefix=rp-un-tier-ub-is --distro=ubuntu-focal --tiered=true --unstable=true --taskname=tiered-storage-cluster --machinearch=arm64 --instancetype=is4gen.4xlarge plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -92,7 +92,7 @@ steps: key: tiered-up-fedora-is4 command: .buildkite/scripts/standup-cluster.sh --prefix=rp-tier-fd-is --distro=Fedora-Cloud-Base-36 --tiered=true --unstable=false --taskname=tiered-storage-cluster --machinearch=arm64 --instancetype=is4gen.4xlarge plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID @@ -102,7 +102,7 @@ steps: key: tiered-up-unstable-fedora-is4 command: .buildkite/scripts/standup-cluster.sh --prefix=rp-un-tier-fd-is --distro=Fedora-Cloud-Base-36 --tiered=true --unstable=true --taskname=tiered-storage-cluster --machinearch=arm64 --instancetype=is4gen.4xlarge plugins: - - docker#v5.4.0: + - docker#v5.8.0: image: glrp/atgt:latest environment: - DA_AWS_ACCESS_KEY_ID From 2a472b3638d805e2f7357fc54cca65b65681e769 Mon Sep 17 00:00:00 2001 From: gene-redpanda <123959009+gene-redpanda@users.noreply.github.com> Date: Fri, 29 Sep 2023 14:56:44 -0400 Subject: [PATCH 6/6] remove skip for proxy cluster --- .buildkite/scripts/test-proxy-cluster.sh | 22 ++++++++++++------- .../scripts/test-tiered-storage-cluster.sh | 2 -- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.buildkite/scripts/test-proxy-cluster.sh b/.buildkite/scripts/test-proxy-cluster.sh index fdabf803..f9e5aeda 100755 --- a/.buildkite/scripts/test-proxy-cluster.sh +++ b/.buildkite/scripts/test-proxy-cluster.sh @@ -72,16 +72,22 @@ testoutput=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o echo $testoutput | grep squirrels || exit 1 if [ "$CLOUD_PROVIDER" == "gcp" ]; then - echo "success" - exit 0 -fi -echo "checking that bucket is not empty" -# Check if the bucket is empty -object_count=$(aws s3api list-objects --bucket "${BUCKET_NAME}" --region us-west-2 --output json | jq '.Contents | length') - -if [ "$object_count" -gt 0 ]; then + echo "checking that gcp bucket is not empty" + echo "$DEVEX_GCP_CREDS_BASE64" | base64 -d > /tmp/gcp_creds.json + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp_creds.json" + export CLOUDSDK_CORE_PROJECT=hallowed-ray-376320 + gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS + echo $BUCKET_NAME + if [ $(gcloud storage ls $(gcloud storage ls | grep ${BUCKET_NAME%-bucket}) | wc -l) -gt 1 ]; then echo "success" exit 0 + fi +else + echo "checking that aws bucket is not empty" + # Check if the bucket is empty + object_count=$(aws s3api list-objects --bucket "${BUCKET_NAME}" --region us-west-2 --output json | jq '.Contents | length') + echo "success" + exit 0 fi echo "fail" diff --git a/.buildkite/scripts/test-tiered-storage-cluster.sh b/.buildkite/scripts/test-tiered-storage-cluster.sh index 37eac8f5..1e1f3b88 100755 --- a/.buildkite/scripts/test-tiered-storage-cluster.sh +++ b/.buildkite/scripts/test-tiered-storage-cluster.sh @@ -95,7 +95,5 @@ else exit 0 fi - - echo "fail" exit 1