Skip to content

Commit

Permalink
Merge pull request #429 from chronicleworks/feature/backport-liveness
Browse files Browse the repository at this point in the history
* Introduce integrated liveness check
  • Loading branch information
ryan-s-roberts authored May 14, 2024
2 parents 37a4466 + 434ddf6 commit b0fa6a8
Show file tree
Hide file tree
Showing 17 changed files with 865 additions and 294 deletions.
656 changes: 488 additions & 168 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ prost = "0.10" # common, sawtooth-protocol, sawtooth-tp: version = "0.10.0"
prost-build = "0.10.0"
prost-types = "0.11.2"
protobuf = "2.27.1"
metrics = "0.21.0"
metrics-exporter-prometheus = "0.12.1"
question = "0.2.2"
r2d2 = "0.8.9"
rand = { version = "0.8.5", features = ["getrandom"] }
Expand Down
23 changes: 23 additions & 0 deletions charts/chronicle/templates/check_metrics_available.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "common.names.fullname" . }}-scripts
data:
first_depth_charge.sh: |
#!/bin/bash
metrics=$(curl -s http://localhost:9000/metrics)
count=$(echo "$metrics" | grep '^depth_charge_round_trip_count' | awk '{print $2}')
if [[ -z "$count" ]] || [[ $count -eq 0 ]]; then
exit 1
fi
check_timeouts.sh: |
#!/bin/bash
metrics=$(curl -s http://localhost:9000/metrics)
timeouts=$(echo "$metrics" | grep '^depth_charge_timeouts' | awk '{print $2}' | tr -d '\r')
if [[ "$timeouts" =~ ^[0-9]+$ ]] && [[ "$timeouts" -ne 0 ]]; then
echo "Non-zero depth_charge_timeouts detected: $timeouts"
exit 1
else
echo "No non-zero depth_charge_timeouts detected."
exit 0
fi
133 changes: 31 additions & 102 deletions charts/chronicle/templates/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,16 @@ spec:
{{- end }}
serve-api \
--interface 0.0.0.0:{{ .Values.port }} \
{{- if .Values.livenessProbe.enabled }}
--liveness-interval {{ .Values.livenessProbe.periodSeconds }}
--liveness-deadline {{ .Values.livenessProbe.timeoutSeconds }}
{{- end }}
{{- if .Values.auth.required }}
--require-auth \
{{- end }}
{{ include "chronicle.jwks-url.cli" . }}
{{ include "chronicle.userinfo-url.cli" . }}
{{ include "chronicle.id-claims" . }}
;
{{ include "chronicle.id-claims" . }};
env: {{ include "lib.safeToYaml" .Values.env | nindent 12 }}
- name: RUST_LOG
value: {{ .Values.logLevel }}
Expand All @@ -144,6 +147,26 @@ spec:
{{- end }}
{{- include "lib.safeToYaml" .Values.postgres.env | nindent 12 }}
resources: {{- include "lib.safeToYaml" .Values.resources | nindent 12 }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
exec:
command:
- /bin/bash
- /scripts/check_timeouts.sh
initialDelaySeconds: 1
periodSeconds: 1
failureThreshold: 1
{{- end}}
{{- if .Values.readinessProbe.enabled }}
readinessProbe:
exec:
command:
- /bin/bash
- /scripts/first_depth_charge.sh
initialDelaySeconds: 1
periodSeconds: 1
failureThreshold: 600
{{- end}}
volumeMounts:
- name: chronicle-config
mountPath: /etc/chronicle/config/
Expand All @@ -152,107 +175,10 @@ spec:
readOnly: true
- name: chronicle-data
mountPath: /var/lib/chronicle/store/
- name: check-metrics-available
mountPath: /scripts/
readOnly: true
{{- include "lib.volumeMounts" .Values.extraVolumeMounts | nindent 12 }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
exec:
command:
- bash
- -c
- |
PROBE_ID="startup_$(LC_ALL=C tr -dc A-Za-z0-9 </dev/urandom | head -c 13)" &&
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") &&
echo '[
{
"@id": "_:n1",
"@type": [
"http://btp.works/chronicleoperations/ns#ActivityExists"
],
"http://btp.works/chronicleoperations/ns#activityName": [
{
"@value": "'"$PROBE_ID"'"
}
],
"http://btp.works/chronicleoperations/ns#namespaceName": [
{
"@value": "{{ .Values.livenessProbe.namespaceName }}"
}
],
"http://btp.works/chronicleoperations/ns#namespaceUuid": [
{
"@value": "{{ .Values.livenessProbe.namespaceUuid }}"
}
]
}
]' > /tmp/import.json &&
echo "Probe ID: $PROBE_ID" &&
chronicle \
-c /etc/chronicle/config/config.toml \
--console-logging json \
--sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \
--remote-database \
--database-name {{ .Values.postgres.database }} \
--database-username {{ .Values.postgres.user }} \
--database-host {{ .Values.postgres.host }} \
{{- if not .Values.opa.enabled }}
--embedded-opa-policy \
{{- end }}
import {{ .Values.livenessProbe.namespaceName }} {{ .Values.livenessProbe.namespaceUuid }} < /tmp/import.json
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- end }}
{{- if .Values.startUpProbe.enabled }}
startupProbe:
exec:
command:
- bash
- -c
- |
PROBE_ID="startup_$(LC_ALL=C tr -dc A-Za-z0-9 </dev/urandom | head -c 13)" &&
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") &&
echo '[
{
"@id": "_:n1",
"@type": [
"http://btp.works/chronicleoperations/ns#ActivityExists"
],
"http://btp.works/chronicleoperations/ns#activityName": [
{
"@value": "'"$PROBE_ID"'"
}
],
"http://btp.works/chronicleoperations/ns#namespaceName": [
{
"@value": "{{ .Values.startUpProbe.namespaceName }}"
}
],
"http://btp.works/chronicleoperations/ns#namespaceUuid": [
{
"@value": "{{ .Values.startUpProbe.namespaceUuid }}"
}
]
}
]' > /tmp/import.json &&
echo "Probe ID: $PROBE_ID" &&
chronicle \
-c /etc/chronicle/config/config.toml \
--console-logging json \
--sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \
--remote-database \
--database-name {{ .Values.postgres.database }} \
--database-username {{ .Values.postgres.user }} \
--database-host {{ .Values.postgres.host }} \
{{- if not .Values.opa.enabled }}
--embedded-opa-policy \
{{- end }}
import {{ .Values.startUpProbe.namespaceName }} {{ .Values.startUpProbe.namespaceUuid }} < /tmp/import.json
initialDelaySeconds: {{ .Values.startUpProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.startUpProbe.periodSeconds }}
timeoutSeconds: {{ .Values.startUpProbe.timeoutSeconds }}
failureThreshold: {{ .Values.startUpProbe.failureThreshold }}
{{- end }}
volumes:
- name: chronicle-secrets
persistentVolumeClaim:
Expand All @@ -263,6 +189,9 @@ spec:
- name: chronicle-config
configMap:
name: {{ .Release.Name }}-chronicle-config
- name: check-metrics-available
configMap:
name: {{ include "common.names.fullname" . }}-scripts
{{- if not .Values.postgres.persistence.enabled }}
- name: "pgdata"
emptyDir: {}
Expand Down
12 changes: 5 additions & 7 deletions charts/chronicle/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,15 @@ auth:

## @md | `livenessProbe.enabled` | if true, enables the liveness probe | false |
livenessProbe:
enabled: false
enabled: true
## @md | `livenessProbe.timeoutSeconds` | number of seconds after which the probe times out | 10 |
timeoutSeconds: 20
## @md | `livenessProbe.periodSeconds` | how often (in seconds) to perform the probe | 60 |
periodSeconds: 60
## @md | `livenessProbe.failureThreshold` | when a probe fails, Kubernetes will try failureThreshold times before giving up | 1 |
failureThreshold: 1
## @md | `livenessProbe.namespaceName` | the Chronicle namespace in which the probe operates | default |
namespaceName: default
## @md | `livenessProbe.namespaceUuid` | the UUID of the Chronicle namespace in which the probe operates | fd717fd6-70f1-44c1-81de-287d5e101089 |
namespaceUuid: fd717fd6-70f1-44c1-81de-287d5e101089

## @md | `readinessProbe.enabled` | if true, enables the readiness probe | false |
readinessProbe:
enabled: true

## @md | `startUpProbe.enabled` | if true, enables the startup probe | true |
startUpProbe:
Expand Down
2 changes: 2 additions & 0 deletions crates/api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ opentelemetry = { workspace = true }
parking_lot = { workspace = true }
poem = { workspace = true }
r2d2 = { workspace = true }
metrics = { workspace = true }
metrics-exporter-prometheus = { workspace = true }
rand = { workspace = true }
rand_core = { workspace = true }
reqwest = { workspace = true }
Expand Down
1 change: 0 additions & 1 deletion crates/api/src/chronicle_graphql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use async_graphql::{
SimpleObject, Subscription, SubscriptionType,
};


use async_graphql_poem::{
GraphQL, GraphQLBatchRequest, GraphQLBatchResponse, GraphQLProtocol, GraphQLSubscription,
GraphQLWebSocket,
Expand Down
Loading

0 comments on commit b0fa6a8

Please sign in to comment.