From db43afee60c42514c4c1e69f520b15a34c65165b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Fri, 26 Apr 2024 13:45:32 +0200 Subject: [PATCH] feat(load-test): Use AWS s3 for tekton results in dev mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel Macík --- .../load-tests/ci-scripts/collect-results.sh | 29 +-- tests/load-tests/ci-scripts/load-test.sh | 6 +- .../max-concurrency/cluster_read_config.yaml | 22 ++ .../max-concurrency/collect-results.sh | 204 +++++++++++++++++- .../ci-scripts/restart-tekton-results.sh | 8 + tests/load-tests/ci-scripts/setup-cluster.sh | 18 +- .../ci-scripts/setup-tekton-results-s3.sh | 34 +++ .../ci-scripts/stage/collect-results.sh | 51 ++--- .../ci-scripts/utility_scripts/runs-to-csv.sh | 4 +- tests/load-tests/cluster_read_config.yaml | 10 + tests/load-tests/run-max-concurrency.sh | 63 +++--- tests/load-tests/run.sh | 53 +++-- 12 files changed, 406 insertions(+), 96 deletions(-) create mode 100755 tests/load-tests/ci-scripts/restart-tekton-results.sh create mode 100755 tests/load-tests/ci-scripts/setup-tekton-results-s3.sh diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh index 004bea5e93..f5c59bb13e 100755 --- a/tests/load-tests/ci-scripts/collect-results.sh +++ b/tests/load-tests/ci-scripts/collect-results.sh @@ -6,8 +6,8 @@ set -o pipefail # shellcheck disable=SC1090 source "/usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${1:-concurrent}" -source "$( dirname $0 )/utils.sh" -source "$( dirname $0 )/user-prefix.sh" +source "$(dirname $0)/utils.sh" +source "$(dirname $0)/user-prefix.sh" echo "[$(date --utc -Ins)] Collecting load test results" @@ -24,14 +24,15 @@ find . -maxdepth 1 -type d -name 'collected-data' -exec cp -r {} "${ARTIFACT_DIR echo "[$(date --utc -Ins)] Setting up Python venv" { -python3 -m venv venv -set +u -source venv/bin/activate -set -u -python3 -m pip install -U pip -python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" -python3 -m pip install tabulate -python3 -m pip install matplotlib + python3 -m venv venv + set +u + source venv/bin/activate + set -u + python3 -m pip install -U pip + python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" + python3 -m pip install tabulate + python3 -m pip install matplotlib + python3 -m pip install 'tenacity<8.4.0' } &>"${ARTIFACT_DIR}/monitoring-setup.log" echo "[$(date --utc -Ins)] Create summary JSON with timings" @@ -48,12 +49,12 @@ echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" status_data.py \ --status-data-file "${STATUS_DATA_FILE}" \ - --set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \ + --set "name=Konflux loadtest" "started=$(cat started)" "ended=$(cat ended)" \ --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Adding monitoring data" -mstarted="$( date -d "$( cat started )" --utc -Iseconds )" -mended="$( date -d "$( cat ended )" --utc -Iseconds )" +mstarted="$(date -d "$(cat started)" --utc -Iseconds)" +mended="$(date -d "$(cat ended)" --utc -Iseconds)" mhost="https://$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')" mrawdir="${ARTIFACT_DIR}/monitoring-raw-data-dir/" mkdir -p "$mrawdir" @@ -64,7 +65,7 @@ status_data.py \ --monitoring-end "$mended" \ --prometheus-host "$mhost" \ --prometheus-port 443 \ - --prometheus-token "$( oc whoami -t )" \ + --prometheus-token "$(oc whoami -t)" \ --monitoring-raw-data-dir "$mrawdir" \ &>"${ARTIFACT_DIR}/monitoring-collection.log" diff --git a/tests/load-tests/ci-scripts/load-test.sh b/tests/load-tests/ci-scripts/load-test.sh index 9ecb1d3946..d40566764f 100755 --- a/tests/load-tests/ci-scripts/load-test.sh +++ b/tests/load-tests/ci-scripts/load-test.sh @@ -10,7 +10,9 @@ source "$( dirname $0 )/user-prefix.sh" pushd "${2:-./tests/load-tests}" -export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL +source "./ci-scripts/user-prefix.sh" + +export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_USER GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org) MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org) @@ -25,6 +27,8 @@ for kv in "${kvs[@]}"; do done echo >>"$rate_limits_csv" +echo -e "[INFO] Start tests with user: ${GITHUB_USER}" + while true; do timestamp=$(printf "%s" "$(date -u +'%FT%T')") echo -n "$timestamp" >>"$rate_limits_csv" diff --git a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml index 2afa8af153..cf9bfb411a 100644 --- a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml +++ b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml @@ -45,4 +45,26 @@ {{ monitor_pod('tekton-results', 'tekton-results-watcher', 20, '-.*') }} {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 20, '-.*') }} +{{ monitor_pod('tekton-results', 'tekton-results-api', 20, '-.*') }} +{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 20, '-.*') }} {{ pv_stats('tekton-results', 'data-postgres-postgresql-0', 20) }} + +- name: measurements.tekton-results-watcher.watcher_workqueue_depth + monitoring_query: sum(watcher_workqueue_depth{job="tekton-results-watcher"}) + monitoring_step: 20 + +- name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket + monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000 + monitoring_step: 20 + +- name: measurements.cluster_cpu_usage_seconds_total_rate + monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""}) + monitoring_step: 20 + +- name: measurements.cluster_memory_usage_rss_total + monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""}) + monitoring_step: 20 + +- name: measurements.cluster_disk_throughput_total + monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m])) + monitoring_step: 20 diff --git a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh index 93763905a3..ae4cf36cda 100755 --- a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh +++ b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh @@ -15,6 +15,9 @@ csv_delim=";" csv_delim_quoted="\"$csv_delim\"" dt_format='"%Y-%m-%dT%H:%M:%SZ"' +artifact_logs="${ARTIFACT_DIR}/logs" +artifact_pprof="${ARTIFACT_DIR}/pprof" + collect_artifacts() { echo "[$(date --utc -Ins)] Collecting load test artifacts" pwd @@ -58,6 +61,19 @@ collect_monitoring_data() { --prometheus-token "$(oc whoami -t)" \ -d &>"$monitoring_collection_log" + mkdir -p "$artifact_logs/ggm" + for file in $(find "$monitoring_collection_dir/" -maxdepth 1 -name "*.csv"); do + echo "Converting $file" + out="$artifact_logs/ggm/$(basename "$file")" + rm -rf "$out" + while read line; do + timestamp=$(echo "$line" | cut -d "," -f1) + value=$(echo "$line" | cut -d "," -f2) + echo "$(date -d "@$timestamp" "+%Y-%m-%dT%H:%M:%S.%N" --utc);$value" >>"$out" + done <<<"$(tail -n +2 "$file")" & + done + wait + ## Monitoring data per iteration for iteration_dir in $(find "$ARTIFACT_DIR/iterations/" -type d -name 'iteration-*'); do echo "[$(date --utc -Ins)] Collecting monitoring data for $iteration_dir" @@ -77,6 +93,7 @@ collect_monitoring_data() { --additional ./tests/load-tests/cluster_read_config.yaml \ --monitoring-start "$mstart" \ --monitoring-end "$mend" \ + --monitoring-raw-data-dir "$monitoring_collection_dir" \ --prometheus-host "https://$mhost" \ --prometheus-port 443 \ --prometheus-token "$(oc whoami -t)" \ @@ -94,8 +111,8 @@ collect_tekton_profiling_data() { for pprof_profile in $(find "$output_dir" -name "*.pprof"); do if [ -s "$pprof_profile" ]; then file=$(basename "$pprof_profile") - go tool pprof -text "$pprof_profile" >"$ARTIFACT_DIR/pprof/$file.txt" || true - go tool pprof -svg -output="$ARTIFACT_DIR/pprof/$file.svg" "$pprof_profile" || true + go tool pprof -text "$pprof_profile" >"$artifact_pprof/$file.txt" || true + go tool pprof -svg -output="$artifact_pprof/$file.svg" "$pprof_profile" || true fi done fi @@ -119,6 +136,7 @@ collect_scalability_data() { ${csv_delim}Threads\ ${csv_delim}WorkloadKPI\ ${csv_delim}Errors\ +${csv_delim}Duration\ ${csv_delim}UserAvgTime\ ${csv_delim}UserMaxTime\ ${csv_delim}CreateApplicationAvgTime\ @@ -151,6 +169,15 @@ ${csv_delim}ClusterPVCInUseAvg\ ${csv_delim}TektonResultsWatcherMemoryMin\ ${csv_delim}TektonResultsWatcherMemoryMax\ ${csv_delim}TektonResultsWatcherMemoryRange\ +${csv_delim}TektonResultsWatcherCPUMin\ +${csv_delim}TektonResultsWatcherCPUMax\ +${csv_delim}TektonResultsWatcherCPURange\ +${csv_delim}TektonResultsWatcherWorkqueueDepthMin\ +${csv_delim}TektonResultsWatcherWorkqueueDepthMax\ +${csv_delim}TektonResultsWatcherWorkqueueDepthRange\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketMin\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketMax\ +${csv_delim}TektonResultsWatcherReconcileLatencyBucketRange\ ${tekton_results_watcher_pod_headers}\ ${csv_delim}SchedulerPendingPodsCountAvg\ ${csv_delim}TokenPoolRatePrimaryAvg\ @@ -217,6 +244,15 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.min | tostring) \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.max | tostring) \ + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.range | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.min | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.max | tostring) \ + + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.range | tostring) \ ${parked_go_routines_columns} \ + $csv_delim_quoted + (.measurements.scheduler_pending_pods_count.mean | tostring) \ + $csv_delim_quoted + (.measurements.token_pool_rate_primary.mean | tostring) \ @@ -272,7 +308,171 @@ collect_timestamp_csvs() { } echo "[$(date --utc -Ins)] Collecting max concurrency results" +jq_iso_8601_to_seconds="( \ + (if \$d | contains(\"m\") and (endswith(\"ms\") | not) then (\$d | capture(\"(?\\\\d+)m(?\\\\d+\\\\.?(\\\\d+)?)s\") | (.minutes | tonumber * 60) + (.seconds | tonumber)) else 0 end) + \ + (if \$d | (contains(\"m\") | not) and contains(\"s\") and (endswith(\"ms\") | not) and (endswith(\"µs\") | not) then (\$d | capture(\"(?\\\\d+\\\\.\\\\d+)s\") | (.seconds | tonumber)) else 0 end) + \ + (if \$d | endswith(\"ms\") then (\$d | split(\"ms\") | .[0] | tonumber / 1000) else 0 end) + \ + (if \$d | endswith(\"µs\") then (\$d | split(\"µs\") | .[0] | tonumber / 1000000) else 0 end) \ +) | tostring" + +convert_go_duration_to_seconds() { + local duration=$1 + local total_seconds=0 + + # Extract hours, minutes, seconds, milliseconds, and microseconds + if [[ $duration =~ ([0-9]*\.?[0-9]+)h ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 3600") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)m ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 60") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)s ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]}") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)ms ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000") + fi + if [[ $duration =~ ([0-9]*\.?[0-9]+)(µs|us) ]]; then + total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000000") + fi + + echo $total_seconds +} + +collect_tekton_results_logs() { + echo "Collecting Tekton results logs..." + mkdir -p "$artifact_logs" + ggm=$artifact_logs/ggm + mkdir -p "$ggm" + oc logs -c api -n tekton-results -l "app.kubernetes.io/name=tekton-results-api" --prefix --tail=-1 >"$artifact_logs/tekton-results-api.log" + oc logs -c watcher -n tekton-results -l "app.kubernetes.io/name=tekton-results-watcher" --prefix --tail=-1 >"$artifact_logs/tekton-results-watcher.log" + oc logs -c minio -n tekton-results "pod/storage-pool-0-0" --prefix --tail=-1 >"$artifact_logs/tekton-result-storage.log" + ts_format='"%Y-%m-%dT%H:%M:%S"' + + jq_cmd="(.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?\\\\.\\\\d+)\") | .milliseconds) \ + + $csv_delim_quoted + ( \ + .msg | capture(\"(?GGM(\\\\d+)?) (?.+) kind (?\\\\S*) ns (?\\\\S*) name (?\\\\S*).* times? spent (?.*)\") \ + | .id \ + + $csv_delim_quoted + (.type) \ + + $csv_delim_quoted + (.kind) \ + + $csv_delim_quoted + (.ns) \ + + $csv_delim_quoted + (.name) \ + + $csv_delim_quoted + (.duration) \ + + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \ + )" + component=tekton-results-api + metrics=("UpdateLog after handleReturn" "UpateLog after flush" "GRPC receive" "RBAC check" "get record" "create stream" "read stream") + for f in $(find $artifact_logs -type f -name "$component*.logs"); do + echo "Processing $f..." + grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json + jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + grep "$metric"';' "$f.csv" >"$f.$m.csv" + done & + done + wait + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv" + done + + component=tekton-results-watcher + metrics=("streamLogs" "dynamic Reconcile" "tkn read" "tkn write" "log copy and write" "flush" "close/rcv") + jq_cmd="if .ts | tostring | contains(\"-\") then .ts | capture(\"(?.*)Z\") | .t else (.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?\\\\.\\\\d+)\") | .milliseconds) end \ + + ( \ + .msg | capture(\"(?GGM(\\\\d+)?) (?.+)(?\\\\S*) obj ns (?\\\\S*) obj name (?\\\\S*) times? spent (?.*)\") \ + | $csv_delim_quoted + (.id) \ + + $csv_delim_quoted + (.type) \ + + $csv_delim_quoted + (.kind) \ + + $csv_delim_quoted + (.ns) \ + + $csv_delim_quoted + (.name) \ + + $csv_delim_quoted + (.duration) \ + + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \ + )" + for f in $(find $artifact_logs -type f -name "$component*.logs"); do + echo "Processing $f..." + grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json + jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + grep "$metric"';' "$f.csv" >"$f.$m.csv" + done & + done + wait + for metric in "${metrics[@]}"; do + m="$(echo "$metric" | sed -e 's,[ /],_,g')" + find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv" + done + + log_file=$(find "$output_dir" -name 'tekton-results-api.*.logs' | tail -n1) + + i=16 + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* time spent \([^ ]\+\) ts \([^ ]\+\) totalSize \(.*\),\2;\1;\3,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]});${tokens[2]}" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + + for i in 17 18; do + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* count \([^ ]\+\) time \([^ ]\+\) ts \([^ ]\+\).*,\3;\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]});${tokens[2]}" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + done + + i=20 + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* runStream \([^ ]\+\) ts \([^ ]\+\),\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + + for i in 24 25; do + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* Write data \([^ ]\+\) ts \([^ ]\+\),\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + done + + i=31 + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* WriteStatus \([^ ]\+\) ts \([^ ]\+\),\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" + + i=33 + output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv" + echo "Generating $output..." + rm -rvf "$output" + while read line; do + duration_ts=$(echo "$line" | sed -e "s,.* handleStream \([^ ]\+\) ts \([^ ]\+\),\2;\1,g") + IFS=";" read -ra tokens <<<"${duration_ts}" + echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output" + done <<<"$(grep "GGMGGM$i" $log_file)" +} + +echo "Collecting max concurrency results..." collect_artifacts || true +collect_tekton_results_logs || true collect_timestamp_csvs || true collect_monitoring_data || true collect_scalability_data || true diff --git a/tests/load-tests/ci-scripts/restart-tekton-results.sh b/tests/load-tests/ci-scripts/restart-tekton-results.sh new file mode 100755 index 0000000000..921c3743cf --- /dev/null +++ b/tests/load-tests/ci-scripts/restart-tekton-results.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "Restarting Tekton Results API" oc rollout restart deployment/tekton-results-api -n tekton-results +oc rollout restart deployment/tekton-results-api -n tekton-results +oc rollout status deployment/tekton-results-api -n tekton-results -w +echo "Restarting Tekton Results Watcher" +oc rollout restart deployment/tekton-results-watcher -n tekton-results +oc rollout status deployment/tekton-results-watcher -n tekton-results -w diff --git a/tests/load-tests/ci-scripts/setup-cluster.sh b/tests/load-tests/ci-scripts/setup-cluster.sh index 40e898a5d3..899a18cb14 100755 --- a/tests/load-tests/ci-scripts/setup-cluster.sh +++ b/tests/load-tests/ci-scripts/setup-cluster.sh @@ -13,7 +13,7 @@ pushd "${2:-.}" echo "Installing app-studio and tweaking cluster configuration" go mod tidy go mod vendor -export MY_GITHUB_ORG QUAY_E2E_ORGANIZATION INFRA_DEPLOYMENTS_ORG INFRA_DEPLOYMENTS_BRANCH TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD E2E_PAC_GITHUB_APP_ID E2E_PAC_GITHUB_APP_PRIVATE_KEY ENABLE_SCHEDULING_ON_MASTER_NODES +export MY_GITHUB_ORG GITHUB_USER QUAY_E2E_ORGANIZATION INFRA_DEPLOYMENTS_ORG INFRA_DEPLOYMENTS_BRANCH TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD E2E_PAC_GITHUB_APP_ID E2E_PAC_GITHUB_APP_PRIVATE_KEY ENABLE_SCHEDULING_ON_MASTER_NODES TEKTON_RESULTS_S3_BUCKET_NAME MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org) QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org) INFRA_DEPLOYMENTS_ORG=${INFRA_DEPLOYMENTS_ORG:-redhat-appstudio} @@ -21,6 +21,7 @@ INFRA_DEPLOYMENTS_BRANCH=${INFRA_DEPLOYMENTS_BRANCH:-main} E2E_PAC_GITHUB_APP_ID="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/pac-github-app-id)" E2E_PAC_GITHUB_APP_PRIVATE_KEY="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/pac-github-app-private-key)" ENABLE_SCHEDULING_ON_MASTER_NODES=false +TEKTON_RESULTS_S3_BUCKET_NAME=${TEKTON_RESULTS_S3_BUCKET_NAME:-} ## Tweak infra-deployments if [ "${TWEAK_INFRA_DEPLOYMENTS:-false}" == "true" ]; then @@ -46,6 +47,7 @@ fi ## Install infra-deployments echo "Installing infra-deployments" +echo " GitHub user: ${GITHUB_USER}" echo " GitHub org: ${INFRA_DEPLOYMENTS_ORG}" echo " GitHub branch: ${INFRA_DEPLOYMENTS_BRANCH}" make local/cluster/prepare @@ -65,4 +67,18 @@ oc patch -n application-service secret has-github-token -p '{"data": {"token": n oc rollout restart deployment -n application-service oc rollout status deployment -n application-service -w +## Setup tekton-results S3 +if [ -n "$TEKTON_RESULTS_S3_BUCKET_NAME" ]; then + echo "Setting up Tekton Results to use S3" + ./tests/load-tests/ci-scripts/setup-tekton-results-s3.sh + echo "Restarting Tekton Results API" + oc rollout restart deployment/tekton-results-api -n tekton-results + oc rollout status deployment/tekton-results-api -n tekton-results -w + echo "Restarting Tekton Results Watcher" + oc rollout restart deployment/tekton-results-watcher -n tekton-results + oc rollout status deployment/tekton-results-watcher -n tekton-results -w +else + echo "TEKTON_RESULTS_S3_BUCKET_NAME env variable is not set or empty - skipping setting up Tekton Results to use S3" +fi + popd diff --git a/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh b/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh new file mode 100755 index 0000000000..60a38a8269 --- /dev/null +++ b/tests/load-tests/ci-scripts/setup-tekton-results-s3.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +if [ -z "${TEKTON_RESULTS_S3_BUCKET_NAME}" ]; then + echo "TEKTON_RESULTS_S3_BUCKET_NAME env variable is not set or empty - skipping setting up Tekton Results to use S3" +else + echo "Setting up Tekton Results to use S3" +fi + +export AWS_REGION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_region) +export AWS_PROFILE=rhtap-perfscale +export AWS_DEFAULT_OUTPUT=json + +NS=tekton-results + +cli=oc +clin="$cli -n $NS" + +echo "Creating S3 bucket $TEKTON_RESULTS_S3_BUCKET_NAME" >&2 +if [ -z "$(aws s3api list-buckets | jq -rc '.Buckets[] | select(.Name =="'"$TEKTON_RESULTS_S3_BUCKET_NAME"'")')" ]; then + aws s3api create-bucket --bucket "$TEKTON_RESULTS_S3_BUCKET_NAME" --region="$AWS_REGION" --create-bucket-configuration LocationConstraint="$AWS_REGION" | jq -rc +else + echo "S3 bucket $TEKTON_RESULTS_S3_BUCKET_NAME already exists, skipping creation" +fi + +echo "Creating namepsace $NS" >&2 +$cli create namespace "$NS" --dry-run=client -o yaml | kubectl apply -f - + +echo "Creating S3 secret" >&2 +$clin create secret generic tekton-results-s3 \ + --from-literal=aws_access_key_id="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_access_key_id)" \ + --from-literal=aws_secret_access_key="$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/aws_secret_access_key)" \ + --from-literal=aws_region="$AWS_REGION" \ + --from-literal=bucket="$TEKTON_RESULTS_S3_BUCKET_NAME" \ + --from-literal=endpoint="https://s3.$AWS_REGION.amazonaws.com" --dry-run=client -o yaml | $clin apply -f - diff --git a/tests/load-tests/ci-scripts/stage/collect-results.sh b/tests/load-tests/ci-scripts/stage/collect-results.sh index 2b1053dd90..3312bf3a54 100755 --- a/tests/load-tests/ci-scripts/stage/collect-results.sh +++ b/tests/load-tests/ci-scripts/stage/collect-results.sh @@ -4,7 +4,7 @@ set -o nounset set -o errexit set -o pipefail -source "$( dirname $0 )/../utils.sh" +source "$(dirname $0)/../utils.sh" echo "[$(date --utc -Ins)] Collecting load test results" @@ -27,14 +27,15 @@ find . -maxdepth 1 -type d -name 'collected-data' -exec cp -r {} "${ARTIFACT_DIR echo "[$(date --utc -Ins)] Setting up Python venv" { -python3 -m venv venv -set +u -source venv/bin/activate -set -u -python3 -m pip install -U pip -python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" -python3 -m pip install tabulate -python3 -m pip install matplotlib + python3 -m venv venv + set +u + source venv/bin/activate + set -u + python3 -m pip install -U pip + python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" + python3 -m pip install tabulate + python3 -m pip install matplotlib + python3 -m pip install 'tenacity<8.4.0' } &>"${ARTIFACT_DIR}/monitoring-setup.log" echo "[$(date --utc -Ins)] Create summary JSON with timings" @@ -51,12 +52,12 @@ echo "[$(date --utc -Ins)] Creating main status data file" STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json" status_data.py \ --status-data-file "${STATUS_DATA_FILE}" \ - --set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \ + --set "name=Konflux loadtest" "started=$(cat started)" "ended=$(cat ended)" \ --set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json" echo "[$(date --utc -Ins)] Adding monitoring data" -mstarted="$( date -d "$( cat started )" --utc -Iseconds )" -mended="$( date -d "$( cat ended )" --utc -Iseconds )" +mstarted="$(date -d "$(cat started)" --utc -Iseconds)" +mended="$(date -d "$(cat ended)" --utc -Iseconds)" mhost="https://$PROMETHEUS_HOST" mrawdir="${ARTIFACT_DIR}/monitoring-raw-data-dir/" mkdir -p "$mrawdir" @@ -81,20 +82,20 @@ else application_stub="${ARTIFACT_DIR}/collected-applications.appstudio.redhat.com" component_stub="${ARTIFACT_DIR}/collected-components.appstudio.redhat.com" - for uid in $( seq 1 $CONCURRENCY ); do + for uid in $(seq 1 $CONCURRENCY); do username="test-rhtap-$uid" - offline_token=$( cat users.json | jq --raw-output '.[] | select(.username == "'$username'").token' ) - api_server=$( cat users.json | jq --raw-output '.[] | select(.username == "'$username'").apiurl' ) - sso_server=$( cat users.json | jq --raw-output '.[] | select(.username == "'$username'").ssourl' ) - access_token=$( curl \ - --silent \ - --header "Accept: application/json" \ - --header "Content-Type: application/x-www-form-urlencoded" \ - --data-urlencode "grant_type=refresh_token" \ - --data-urlencode "client_id=cloud-services" \ - --data-urlencode "refresh_token=${offline_token}" \ - "${sso_server}" \ - | jq --raw-output ".access_token" ) + offline_token=$(cat users.json | jq --raw-output '.[] | select(.username == "'$username'").token') + api_server=$(cat users.json | jq --raw-output '.[] | select(.username == "'$username'").apiurl') + sso_server=$(cat users.json | jq --raw-output '.[] | select(.username == "'$username'").ssourl') + access_token=$(curl \ + --silent \ + --header "Accept: application/json" \ + --header "Content-Type: application/x-www-form-urlencoded" \ + --data-urlencode "grant_type=refresh_token" \ + --data-urlencode "client_id=cloud-services" \ + --data-urlencode "refresh_token=${offline_token}" \ + "${sso_server}" | + jq --raw-output ".access_token") login_log="${login_log_stub}-${username}.log" echo "Logging in as $username..." if ! oc login --token="$access_token" --server="$api_server" &>$login_log; then diff --git a/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh b/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh index c0ebfc3691..04a5a37ec7 100755 --- a/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh +++ b/tests/load-tests/ci-scripts/utility_scripts/runs-to-csv.sh @@ -55,8 +55,8 @@ echo "$headers" find "${1:-.}" -name load-test.json -print0 | sort | while IFS= read -r -d '' filename; do grep --quiet "XXXXX" "${filename}" && echo "WARNING placeholders found in ${filename}, removing" - sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "${filename}" \ - | jq --raw-output '[ + sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "${filename}" | + jq --raw-output '[ .metadata.env.BUILD_ID, .started, .ended, diff --git a/tests/load-tests/cluster_read_config.yaml b/tests/load-tests/cluster_read_config.yaml index de8d03677b..b8f6c8481e 100644 --- a/tests/load-tests/cluster_read_config.yaml +++ b/tests/load-tests/cluster_read_config.yaml @@ -200,3 +200,13 @@ {{ monitor_pod('openshift-pipelines', 'tekton-pipelines-controller', 15) }} {{ monitor_pod('tekton-results', 'tekton-results-watcher', 1, '-.*') }} {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 1, '-.*') }} +{{ monitor_pod('tekton-results', 'tekton-results-api', 1, '-.*') }} +{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 1, '-.*') }} + +- name: measurements.tekton-results-watcher.watcher_workqueue_depth + monitoring_query: sum(watcher_workqueue_depth{job="tekton-results-watcher"}) + monitoring_step: 1 + +- name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket + monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000 + monitoring_step: 1 \ No newline at end of file diff --git a/tests/load-tests/run-max-concurrency.sh b/tests/load-tests/run-max-concurrency.sh index 53e4f9754e..f16b045915 100755 --- a/tests/load-tests/run-max-concurrency.sh +++ b/tests/load-tests/run-max-concurrency.sh @@ -15,31 +15,36 @@ load_test() { workdir=${1:-/tmp} threads=${2:-1} index=$(printf "%04d" "$threads") + echo + echo "=== RHTAP load test ===" + echo "Threads: $threads" + echo "Iteration: $(basename "$workdir")" + echo ## Enable CPU profiling in Tekton if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then - echo "Starting CPU profiling with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - file="tekton-pipelines-controller.$pod.cpu-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & - echo $! >"$workdir/$file.pid" - done + echo "Starting CPU profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" file="tekton-results-watcher.$pod.cpu-profile" oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" + file=tekton-results-watcher.$pod.cpu-profile.mutex + oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/mutex?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & + echo $! >"$workdir/$file.pid" done - fi - ## Enable memory profiling in Tekton - if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then - echo "Starting memory profiling of Tekton controller with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Starting CPU profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.memory-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & + file=tekton-results-api.$pod.cpu-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & + echo $! >"$workdir/$file.pid" + file=tekton-results-api.$pod.cpu-profile.mutex + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/mutex?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done + fi + ## Enable memory profiling in Tekton + if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then echo "Starting memory profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -47,6 +52,13 @@ load_test() { oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done + echo "Starting memory profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + file=tekton-results-api.$pod.memory-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & + echo $! >"$workdir/$file.pid" + done fi rm -rvf "$workdir/load-test.json" rm -rvf "$workdir/load-test.log" @@ -96,25 +108,25 @@ load_test() { deactivate if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then - echo "[$(date --utc -Ins)] Waiting for the Tekton profiling to finish up to ${TEKTON_PERF_PROFILE_CPU_PERIOD}s" - for pid_file in $(find "$workdir" -name 'tekton*.pid'); do + echo "Waiting for the Tekton profiling to finish up to ${TEKTON_PERF_PROFILE_CPU_PERIOD}s" + for pid_file in $(find $output_dir -name 'tekton*.pid'); do wait "$(cat "$pid_file")" rm -rvf "$pid_file" done - echo "[$(date --utc -Ins)] Getting Tekton controller goroutine dump" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Getting Tekton results watcher goroutine dump" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" for i in 0 1 2; do - file="tekton-pipelines-controller.$pod.goroutine-dump-$i" - oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" + file="tekton-results-watcher.$pod.goroutine-dump-$i" + oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" done done - echo "[$(date --utc -Ins)] Getting Tekton results watcher goroutine dump" - for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do + echo "Getting Tekton results api goroutine dump" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do pod="${p##*/}" for i in 0 1 2; do - file="tekton-results-watcher.$pod.goroutine-dump-$i" - oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" + file="tekton-results-api.$pod.goroutine-dump-$i" + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL localhost:6060/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" done done fi @@ -188,6 +200,7 @@ max_concurrency() { deactivate } &>"$OUTPUT_DIR/monitoring-setup.log" + iteration=${ITERATION_OFFSET:-0} for t in "${maxConcurrencySteps[@]}"; do iteration="$((iteration + 1))" if (("$t" > "$maxThreads")); then @@ -201,6 +214,8 @@ max_concurrency() { mkdir "${workdir}" load_test "$workdir" "$t" jq ".metadata.\"max-concurrency\".iteration = \"$(printf "%04d" "$iteration")\"" "$workdir/load-test.json" >"$OUTPUT_DIR/$$.json" && mv -f "$OUTPUT_DIR/$$.json" "$workdir/load-test.json" + oc logs -c watcher -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher --tail=-1 --prefix=true >"$workdir/tekton-results-watcher.logs" + oc logs -c api -n tekton-results -l app.kubernetes.io/name=tekton-results-api --tail=-1 --prefix=true >"$workdir/tekton-results-api.logs" workloadKPI=$(jq '.results.measurements.KPI.mean' "$workdir/load-test.json") workloadKPIerrors=$(jq '.results.measurements.KPI.errors' "$workdir/load-test.json") if [ -z "$workloadKPI" ] || [ -z "$workloadKPIerrors" ] || [ "$workloadKPI" = "null" ] || [ "$workloadKPIerrors" = "null" ]; then diff --git a/tests/load-tests/run.sh b/tests/load-tests/run.sh index 13150a251c..ce16c6ee54 100755 --- a/tests/load-tests/run.sh +++ b/tests/load-tests/run.sh @@ -8,30 +8,24 @@ USER_PREFIX=${USER_PREFIX:-testuser} ## Enable CPU profiling in Tekton if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then - echo "Starting CPU profiling with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do + echo "Starting CPU profiling of Tekton results watcher with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.cpu-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + file=tekton-results-watcher.$pod.cpu-profile + oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" + done + echo "Starting CPU profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + file=tekton-results-api.$pod.cpu-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:6060/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done - p=$(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name) - pod="${p##*/}" - file=tekton-results-watcher.$pod.cpu-profile - oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & - echo $! >"$output_dir/$file.pid" fi ## Enable memory profiling in Tekton if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then - file=tekton-pipelines-controller.memory-profile - echo "Starting memory profiling of Tekton controller with pprof" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - file="tekton-pipelines-controller.$pod.memory-profile" - oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & - echo $! >"$output_dir/$file.pid" - done echo "Starting memory profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -39,6 +33,13 @@ if [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & echo $! >"$output_dir/$file.pid" done + echo "Starting memory profiling of Tekton results API with pprof" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + file=tekton-results-api.$pod.memory-profile + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$output_dir/$file.pprof" & + echo $! >"$output_dir/$file.pid" + done fi ## Switch KubeScheduler Debugging on @@ -95,14 +96,6 @@ if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENA wait "$(cat "$pid_file")" rm -rvf "$pid_file" done - echo "Getting Tekton controller goroutine dump" - for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do - pod="${p##*/}" - for i in 0 1 2; do - file="tekton-pipelines-controller.$pod.goroutine-dump-$i" - oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" - done - done echo "Getting Tekton results watcher goroutine dump" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" @@ -111,9 +104,15 @@ if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENA oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" done done + echo "Getting Tekton results api goroutine dump" + for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-api -o name); do + pod="${p##*/}" + for i in 0 1 2; do + file="tekton-results-api.$pod.goroutine-dump-$i" + oc exec -n tekton-results "$p" -c api -- bash -c "curl -SsL localhost:6060/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$output_dir/$file.pprof" + done + done fi - -## Stop collecting KubeScheduler log if [ -n "$KUBE_SCHEDULER_LOG_LEVEL" ]; then echo "Killing kube collector log collector" kill "$KUBE_SCHEDULER_LOG_PID"