Skip to content

Commit

Permalink
feat(load-test): Use AWS s3 for tekton results in dev mode
Browse files Browse the repository at this point in the history
Signed-off-by: Pavel Macík <[email protected]>
  • Loading branch information
pmacik committed Jun 20, 2024
1 parent 72c5635 commit db43afe
Show file tree
Hide file tree
Showing 12 changed files with 406 additions and 96 deletions.
29 changes: 15 additions & 14 deletions tests/load-tests/ci-scripts/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ set -o pipefail

# shellcheck disable=SC1090
source "/usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${1:-concurrent}"
source "$( dirname $0 )/utils.sh"
source "$( dirname $0 )/user-prefix.sh"
source "$(dirname $0)/utils.sh"
source "$(dirname $0)/user-prefix.sh"

echo "[$(date --utc -Ins)] Collecting load test results"

Expand All @@ -24,14 +24,15 @@ find . -maxdepth 1 -type d -name 'collected-data' -exec cp -r {} "${ARTIFACT_DIR

echo "[$(date --utc -Ins)] Setting up Python venv"
{
python3 -m venv venv
set +u
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
python3 -m pip install tabulate
python3 -m pip install matplotlib
python3 -m venv venv
set +u
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
python3 -m pip install tabulate
python3 -m pip install matplotlib
python3 -m pip install 'tenacity<8.4.0'
} &>"${ARTIFACT_DIR}/monitoring-setup.log"

echo "[$(date --utc -Ins)] Create summary JSON with timings"
Expand All @@ -48,12 +49,12 @@ echo "[$(date --utc -Ins)] Creating main status data file"
STATUS_DATA_FILE="${ARTIFACT_DIR}/load-test.json"
status_data.py \
--status-data-file "${STATUS_DATA_FILE}" \
--set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \
--set "name=Konflux loadtest" "started=$(cat started)" "ended=$(cat ended)" \
--set-subtree-json "parameters.options=${ARTIFACT_DIR}/load-test-options.json" "results.measurements=${ARTIFACT_DIR}/load-test-timings.json"

echo "[$(date --utc -Ins)] Adding monitoring data"
mstarted="$( date -d "$( cat started )" --utc -Iseconds )"
mended="$( date -d "$( cat ended )" --utc -Iseconds )"
mstarted="$(date -d "$(cat started)" --utc -Iseconds)"
mended="$(date -d "$(cat ended)" --utc -Iseconds)"
mhost="https://$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')"
mrawdir="${ARTIFACT_DIR}/monitoring-raw-data-dir/"
mkdir -p "$mrawdir"
Expand All @@ -64,7 +65,7 @@ status_data.py \
--monitoring-end "$mended" \
--prometheus-host "$mhost" \
--prometheus-port 443 \
--prometheus-token "$( oc whoami -t )" \
--prometheus-token "$(oc whoami -t)" \
--monitoring-raw-data-dir "$mrawdir" \
&>"${ARTIFACT_DIR}/monitoring-collection.log"

Expand Down
6 changes: 5 additions & 1 deletion tests/load-tests/ci-scripts/load-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ source "$( dirname $0 )/user-prefix.sh"

pushd "${2:-./tests/load-tests}"

export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL
source "./ci-scripts/user-prefix.sh"

export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_USER GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL
QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org)
MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org)

Expand All @@ -25,6 +27,8 @@ for kv in "${kvs[@]}"; do
done
echo >>"$rate_limits_csv"

echo -e "[INFO] Start tests with user: ${GITHUB_USER}"

while true; do
timestamp=$(printf "%s" "$(date -u +'%FT%T')")
echo -n "$timestamp" >>"$rate_limits_csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,26 @@

{{ monitor_pod('tekton-results', 'tekton-results-watcher', 20, '-.*') }}
{{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 20, '-.*') }}
{{ monitor_pod('tekton-results', 'tekton-results-api', 20, '-.*') }}
{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 20, '-.*') }}
{{ pv_stats('tekton-results', 'data-postgres-postgresql-0', 20) }}

- name: measurements.tekton-results-watcher.watcher_workqueue_depth
monitoring_query: sum(watcher_workqueue_depth{job="tekton-results-watcher"})
monitoring_step: 20

- name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket
monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000
monitoring_step: 20

- name: measurements.cluster_cpu_usage_seconds_total_rate
monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""})
monitoring_step: 20

- name: measurements.cluster_memory_usage_rss_total
monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""})
monitoring_step: 20

- name: measurements.cluster_disk_throughput_total
monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]))
monitoring_step: 20
204 changes: 202 additions & 2 deletions tests/load-tests/ci-scripts/max-concurrency/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ csv_delim=";"
csv_delim_quoted="\"$csv_delim\""
dt_format='"%Y-%m-%dT%H:%M:%SZ"'

artifact_logs="${ARTIFACT_DIR}/logs"
artifact_pprof="${ARTIFACT_DIR}/pprof"

collect_artifacts() {
echo "[$(date --utc -Ins)] Collecting load test artifacts"
pwd
Expand Down Expand Up @@ -58,6 +61,19 @@ collect_monitoring_data() {
--prometheus-token "$(oc whoami -t)" \
-d &>"$monitoring_collection_log"

mkdir -p "$artifact_logs/ggm"
for file in $(find "$monitoring_collection_dir/" -maxdepth 1 -name "*.csv"); do
echo "Converting $file"
out="$artifact_logs/ggm/$(basename "$file")"
rm -rf "$out"
while read line; do
timestamp=$(echo "$line" | cut -d "," -f1)
value=$(echo "$line" | cut -d "," -f2)
echo "$(date -d "@$timestamp" "+%Y-%m-%dT%H:%M:%S.%N" --utc);$value" >>"$out"
done <<<"$(tail -n +2 "$file")" &
done
wait

## Monitoring data per iteration
for iteration_dir in $(find "$ARTIFACT_DIR/iterations/" -type d -name 'iteration-*'); do
echo "[$(date --utc -Ins)] Collecting monitoring data for $iteration_dir"
Expand All @@ -77,6 +93,7 @@ collect_monitoring_data() {
--additional ./tests/load-tests/cluster_read_config.yaml \
--monitoring-start "$mstart" \
--monitoring-end "$mend" \
--monitoring-raw-data-dir "$monitoring_collection_dir" \
--prometheus-host "https://$mhost" \
--prometheus-port 443 \
--prometheus-token "$(oc whoami -t)" \
Expand All @@ -94,8 +111,8 @@ collect_tekton_profiling_data() {
for pprof_profile in $(find "$output_dir" -name "*.pprof"); do
if [ -s "$pprof_profile" ]; then
file=$(basename "$pprof_profile")
go tool pprof -text "$pprof_profile" >"$ARTIFACT_DIR/pprof/$file.txt" || true
go tool pprof -svg -output="$ARTIFACT_DIR/pprof/$file.svg" "$pprof_profile" || true
go tool pprof -text "$pprof_profile" >"$artifact_pprof/$file.txt" || true
go tool pprof -svg -output="$artifact_pprof/$file.svg" "$pprof_profile" || true
fi
done
fi
Expand All @@ -119,6 +136,7 @@ collect_scalability_data() {
${csv_delim}Threads\
${csv_delim}WorkloadKPI\
${csv_delim}Errors\
${csv_delim}Duration\
${csv_delim}UserAvgTime\
${csv_delim}UserMaxTime\
${csv_delim}CreateApplicationAvgTime\
Expand Down Expand Up @@ -151,6 +169,15 @@ ${csv_delim}ClusterPVCInUseAvg\
${csv_delim}TektonResultsWatcherMemoryMin\
${csv_delim}TektonResultsWatcherMemoryMax\
${csv_delim}TektonResultsWatcherMemoryRange\
${csv_delim}TektonResultsWatcherCPUMin\
${csv_delim}TektonResultsWatcherCPUMax\
${csv_delim}TektonResultsWatcherCPURange\
${csv_delim}TektonResultsWatcherWorkqueueDepthMin\
${csv_delim}TektonResultsWatcherWorkqueueDepthMax\
${csv_delim}TektonResultsWatcherWorkqueueDepthRange\
${csv_delim}TektonResultsWatcherReconcileLatencyBucketMin\
${csv_delim}TektonResultsWatcherReconcileLatencyBucketMax\
${csv_delim}TektonResultsWatcherReconcileLatencyBucketRange\
${tekton_results_watcher_pod_headers}\
${csv_delim}SchedulerPendingPodsCountAvg\
${csv_delim}TokenPoolRatePrimaryAvg\
Expand Down Expand Up @@ -217,6 +244,15 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.min | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.max | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.range | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.min | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.max | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".cpu.range | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.min | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.max | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_workqueue_depth.range | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.min | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.max | tostring) \
+ $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".watcher_reconcile_latency_bucket.range | tostring) \
${parked_go_routines_columns} \
+ $csv_delim_quoted + (.measurements.scheduler_pending_pods_count.mean | tostring) \
+ $csv_delim_quoted + (.measurements.token_pool_rate_primary.mean | tostring) \
Expand Down Expand Up @@ -272,7 +308,171 @@ collect_timestamp_csvs() {
}

echo "[$(date --utc -Ins)] Collecting max concurrency results"
jq_iso_8601_to_seconds="( \
(if \$d | contains(\"m\") and (endswith(\"ms\") | not) then (\$d | capture(\"(?<minutes>\\\\d+)m(?<seconds>\\\\d+\\\\.?(\\\\d+)?)s\") | (.minutes | tonumber * 60) + (.seconds | tonumber)) else 0 end) + \
(if \$d | (contains(\"m\") | not) and contains(\"s\") and (endswith(\"ms\") | not) and (endswith(\"µs\") | not) then (\$d | capture(\"(?<seconds>\\\\d+\\\\.\\\\d+)s\") | (.seconds | tonumber)) else 0 end) + \
(if \$d | endswith(\"ms\") then (\$d | split(\"ms\") | .[0] | tonumber / 1000) else 0 end) + \
(if \$d | endswith(\"µs\") then (\$d | split(\"µs\") | .[0] | tonumber / 1000000) else 0 end) \
) | tostring"

convert_go_duration_to_seconds() {
local duration=$1
local total_seconds=0

# Extract hours, minutes, seconds, milliseconds, and microseconds
if [[ $duration =~ ([0-9]*\.?[0-9]+)h ]]; then
total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 3600")
fi
if [[ $duration =~ ([0-9]*\.?[0-9]+)m ]]; then
total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} * 60")
fi
if [[ $duration =~ ([0-9]*\.?[0-9]+)s ]]; then
total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]}")
fi
if [[ $duration =~ ([0-9]*\.?[0-9]+)ms ]]; then
total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000")
fi
if [[ $duration =~ ([0-9]*\.?[0-9]+)(µs|us) ]]; then
total_seconds=$(bc <<<"$total_seconds + ${BASH_REMATCH[1]} / 1000000")
fi

echo $total_seconds
}

collect_tekton_results_logs() {
echo "Collecting Tekton results logs..."
mkdir -p "$artifact_logs"
ggm=$artifact_logs/ggm
mkdir -p "$ggm"
oc logs -c api -n tekton-results -l "app.kubernetes.io/name=tekton-results-api" --prefix --tail=-1 >"$artifact_logs/tekton-results-api.log"
oc logs -c watcher -n tekton-results -l "app.kubernetes.io/name=tekton-results-watcher" --prefix --tail=-1 >"$artifact_logs/tekton-results-watcher.log"
oc logs -c minio -n tekton-results "pod/storage-pool-0-0" --prefix --tail=-1 >"$artifact_logs/tekton-result-storage.log"
ts_format='"%Y-%m-%dT%H:%M:%S"'

jq_cmd="(.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?<milliseconds>\\\\.\\\\d+)\") | .milliseconds) \
+ $csv_delim_quoted + ( \
.msg | capture(\"(?<id>GGM(\\\\d+)?) (?<type>.+) kind (?<kind>\\\\S*) ns (?<ns>\\\\S*) name (?<name>\\\\S*).* times? spent (?<duration>.*)\") \
| .id \
+ $csv_delim_quoted + (.type) \
+ $csv_delim_quoted + (.kind) \
+ $csv_delim_quoted + (.ns) \
+ $csv_delim_quoted + (.name) \
+ $csv_delim_quoted + (.duration) \
+ $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \
)"
component=tekton-results-api
metrics=("UpdateLog after handleReturn" "UpateLog after flush" "GRPC receive" "RBAC check" "get record" "create stream" "read stream")
for f in $(find $artifact_logs -type f -name "$component*.logs"); do
echo "Processing $f..."
grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json
jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true
for metric in "${metrics[@]}"; do
m="$(echo "$metric" | sed -e 's,[ /],_,g')"
grep "$metric"';' "$f.csv" >"$f.$m.csv"
done &
done
wait
for metric in "${metrics[@]}"; do
m="$(echo "$metric" | sed -e 's,[ /],_,g')"
find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv"
done

component=tekton-results-watcher
metrics=("streamLogs" "dynamic Reconcile" "tkn read" "tkn write" "log copy and write" "flush" "close/rcv")
jq_cmd="if .ts | tostring | contains(\"-\") then .ts | capture(\"(?<t>.*)Z\") | .t else (.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?<milliseconds>\\\\.\\\\d+)\") | .milliseconds) end \
+ ( \
.msg | capture(\"(?<id>GGM(\\\\d+)?) (?<type>.+)(?<! obj)( obj)? kind (?<kind>\\\\S*) obj ns (?<ns>\\\\S*) obj name (?<name>\\\\S*) times? spent (?<duration>.*)\") \
| $csv_delim_quoted + (.id) \
+ $csv_delim_quoted + (.type) \
+ $csv_delim_quoted + (.kind) \
+ $csv_delim_quoted + (.ns) \
+ $csv_delim_quoted + (.name) \
+ $csv_delim_quoted + (.duration) \
+ $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \
)"
for f in $(find $artifact_logs -type f -name "$component*.logs"); do
echo "Processing $f..."
grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json
jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true
for metric in "${metrics[@]}"; do
m="$(echo "$metric" | sed -e 's,[ /],_,g')"
grep "$metric"';' "$f.csv" >"$f.$m.csv"
done &
done
wait
for metric in "${metrics[@]}"; do
m="$(echo "$metric" | sed -e 's,[ /],_,g')"
find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv"
done

log_file=$(find "$output_dir" -name 'tekton-results-api.*.logs' | tail -n1)

i=16
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* time spent \([^ ]\+\) ts \([^ ]\+\) totalSize \(.*\),\2;\1;\3,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]});${tokens[2]}" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"

for i in 17 18; do
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* count \([^ ]\+\) time \([^ ]\+\) ts \([^ ]\+\).*,\3;\2;\1,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]});${tokens[2]}" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"
done

i=20
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* runStream \([^ ]\+\) ts \([^ ]\+\),\2;\1,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"

for i in 24 25; do
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* Write data \([^ ]\+\) ts \([^ ]\+\),\2;\1,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"
done

i=31
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* WriteStatus \([^ ]\+\) ts \([^ ]\+\),\2;\1,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"

i=33
output="$artifact_logs/ggm/$(basename "$log_file").ggmggm$i.csv"
echo "Generating $output..."
rm -rvf "$output"
while read line; do
duration_ts=$(echo "$line" | sed -e "s,.* handleStream \([^ ]\+\) ts \([^ ]\+\),\2;\1,g")
IFS=";" read -ra tokens <<<"${duration_ts}"
echo "$(date -d @"${tokens[0]}" --utc +"%Y-%m-%dT%H:%M:%S.%N");$(convert_go_duration_to_seconds ${tokens[1]})" >>"$output"
done <<<"$(grep "GGMGGM$i" $log_file)"
}

echo "Collecting max concurrency results..."
collect_artifacts || true
collect_tekton_results_logs || true
collect_timestamp_csvs || true
collect_monitoring_data || true
collect_scalability_data || true
Expand Down
8 changes: 8 additions & 0 deletions tests/load-tests/ci-scripts/restart-tekton-results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

echo "Restarting Tekton Results API" oc rollout restart deployment/tekton-results-api -n tekton-results
oc rollout restart deployment/tekton-results-api -n tekton-results
oc rollout status deployment/tekton-results-api -n tekton-results -w
echo "Restarting Tekton Results Watcher"
oc rollout restart deployment/tekton-results-watcher -n tekton-results
oc rollout status deployment/tekton-results-watcher -n tekton-results -w
Loading

0 comments on commit db43afe

Please sign in to comment.