From b94b03aa1e42a73f1a0fc3882150f29d92df8419 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Mon, 18 Nov 2024 22:15:22 +0100 Subject: [PATCH] Refine v1beta2 machine ready --- .../machine/machine_controller_status.go | 76 ++++++- .../machine/machine_controller_status_test.go | 191 +++++++++++++++++- util/conditions/v1beta2/merge_strategies.go | 12 +- util/conditions/v1beta2/options.go | 8 + util/conditions/v1beta2/summary_test.go | 24 ++- 5 files changed, 302 insertions(+), 9 deletions(-) diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 4c5a1101ddbc..c3a9f7a7e76c 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -488,8 +488,8 @@ type machineConditionCustomMergeStrategy struct { func (c machineConditionCustomMergeStrategy) Merge(conditions []v1beta2conditions.ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) { return v1beta2conditions.DefaultMergeStrategy( + // While machine is deleting, treat unknown conditions from external objects as info (it is ok that those objects have been deleted at this stage). v1beta2conditions.GetPriorityFunc(func(condition metav1.Condition) v1beta2conditions.MergePriority { - // While machine is deleting, treat unknown conditions from external objects as info (it is ok that those objects have been deleted at this stage). if !c.machine.DeletionTimestamp.IsZero() { if condition.Type == clusterv1.MachineBootstrapConfigReadyV1Beta2Condition && (condition.Reason == clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason || condition.Reason == clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason) { return v1beta2conditions.InfoMergePriority @@ -504,6 +504,9 @@ func (c machineConditionCustomMergeStrategy) Merge(conditions []v1beta2condition } return v1beta2conditions.GetDefaultMergePriorityFunc(c.negativePolarityConditionTypes...)(condition) }), + // Group readiness gates for control plane and etcd conditions when they have the same messages. + v1beta2conditions.SummaryMessageTransformFunc(transformControlPlaneAndEtcdConditions), + // Use custom reasons. v1beta2conditions.ComputeReasonFunc(v1beta2conditions.GetDefaultComputeMergeReasonFunc( clusterv1.MachineNotReadyV1Beta2Reason, clusterv1.MachineReadyUnknownV1Beta2Reason, @@ -512,6 +515,77 @@ func (c machineConditionCustomMergeStrategy) Merge(conditions []v1beta2condition ).Merge(conditions, conditionTypes) } +// transformControlPlaneAndEtcdConditions Group readiness gates for control plane conditions when they have the same messages. +// Note: the implementation is based on KCP conditions, but ideally other control plane implementation could +// take benefit from this optimization by naming conditions with APIServer, ControllerManager, Scheduler prefix. +// In future we might consider to do something similar for etcd conditions. +func transformControlPlaneAndEtcdConditions(messages []string) []string { + isControlPlaneCondition := func(c string) bool { + if strings.HasPrefix(c, "* APIServer") { + return true + } + if strings.HasPrefix(c, "* ControllerManager") { + return true + } + if strings.HasPrefix(c, "* Scheduler") { + return true + } + // Note. Etcd pod healthy is considered as part of control plane components in KCP + // because it is not part of the etcd cluster. + // Might be in future we want to make this check more strictly tight to KCP machines e.g. by checking the machine's owner; + // for now, we consider checking for the exact condition name as an acceptable trade off (same below). + if c == "* EtcdPodHealthy" { + return true + } + return false + } + + // Loop trough summary message. + out := []string{} + controlPlaneConditionsCount := 0 + controlPlaneMsg := "" + for _, m := range messages { + // Summary message are in the form of "* Condition: Message"; the following code + // figure out if this is a control plane condition. + sep := strings.Index(m, ":") + if sep == -1 { + out = append(out, m) + continue + } + c, msg := m[:sep], m[sep+1:] + if !isControlPlaneCondition(c) { + // If the condition isn't a control plane condition, add to the output the message as is. + out = append(out, m) + continue + } + + // If the condition is the first control plane condition we meet, add to the output + // a message replacing the condition name with a placeholder for the control plane components + if controlPlaneMsg == "" { + controlPlaneMsg = msg + out = append(out, fmt.Sprintf("* Control plane components:%s", msg)) + controlPlaneConditionsCount++ + continue + } + + // If the condition is not the first control plane condition we meet, if the message is + // different from the previous control plane condition, we can't group control plane components + // so we return the same list of messages we got in input. + // otherwise, continue looping into conditions. + if controlPlaneMsg != msg { + return messages + } + controlPlaneConditionsCount++ + } + + // If we met only 1 control plane component, return the same list of messages we got in input + // so we are going to show the condition name instead of the placeholder for the control plane components. + if controlPlaneConditionsCount == 1 { + return messages + } + return out +} + func setDeletingCondition(_ context.Context, machine *clusterv1.Machine, reconcileDeleteExecuted bool, deletingReason, deletingMessage string) { if machine.DeletionTimestamp.IsZero() { v1beta2conditions.Set(machine, metav1.Condition{ diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index edec3d4107d1..c60a736fc4ef 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -1221,6 +1221,121 @@ func TestDeletingCondition(t *testing.T) { } } +func TestTransformControlPlaneAndEtcdConditions(t *testing.T) { + testCases := []struct { + name string + messages []string + expectMessages []string + }{ + { + name: "no-op without control plane conditions", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.InfrastructureReadyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineNodeHealthyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineDeletingV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineHealthCheckSucceededV1Beta2Condition), + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.InfrastructureReadyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineNodeHealthyV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineDeletingV1Beta2Condition), + fmt.Sprintf("* %s: Foo", clusterv1.MachineHealthCheckSucceededV1Beta2Condition), + }, + }, + { + name: "group control plane conditions when msg are all equal", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Waiting for AWSMachine to report spec.providerID", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething: Waiting for AWSMachine to report spec.providerID", + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* Control plane components: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething: Waiting for AWSMachine to report spec.providerID", + }, + }, + { + name: "don't group control plane conditions when msg are not all equal", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Some message", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething:Some message", + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Some message", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething:Some message", + }, + }, + { + name: "don't group control plane conditions when there is only one condition", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething:Some message", + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething:Some message", + }, + }, + { + name: "Treat EtcdPodHealthy as a special case (it groups with control plane components)", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Waiting for AWSMachine to report spec.providerID", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdPodHealthy: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething: Some message", + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* Control plane components: Waiting for AWSMachine to report spec.providerID", + "* EtcdSomething: Some message", + }, + }, + { + name: "Treat EtcdPodHealthy as a special case (it does not group with other etcd conditions)", + messages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Waiting for AWSMachine to report spec.providerID", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdPodHealthy: Some message", + "* EtcdSomething: Some message", + }, + expectMessages: []string{ + fmt.Sprintf("* %s: Foo", clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + "* APIServerSomething: Waiting for AWSMachine to report spec.providerID", + "* ControllerManagerSomething: Waiting for AWSMachine to report spec.providerID", + "* SchedulerSomething: Waiting for AWSMachine to report spec.providerID", + "* EtcdPodHealthy: Some message", + "* EtcdSomething: Some message", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + got := transformControlPlaneAndEtcdConditions(tc.messages) + g.Expect(got).To(Equal(tc.expectMessages)) + }) + } +} + func TestSetReadyCondition(t *testing.T) { testCases := []struct { name string @@ -1417,9 +1532,7 @@ func TestSetReadyCondition(t *testing.T) { }, Spec: clusterv1.MachineSpec{ ReadinessGates: []clusterv1.MachineReadinessGate{ - { - ConditionType: "MyReadinessGate", - }, + {ConditionType: "MyReadinessGate"}, }, }, Status: clusterv1.MachineStatus{ @@ -1467,6 +1580,78 @@ func TestSetReadyCondition(t *testing.T) { Message: "* MyReadinessGate: Some message", }, }, + { + name: "Groups readiness gates for control plane components and etcd member when possible and there is more than one condition for each category", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSpec{ + ReadinessGates: []clusterv1.MachineReadinessGate{ + {ConditionType: "APIServerSomething"}, + {ConditionType: "ControllerManagerSomething"}, + {ConditionType: "EtcdSomething"}, + }, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.InfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: "APIServerSomething", + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some control plane message", + }, + { + Type: "ControllerManagerSomething", + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some control plane message", + }, + { + Type: "EtcdSomething", + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some etcd message", + }, + { + Type: clusterv1.MachineDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotDeletingV1Beta2Reason, + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotReadyV1Beta2Reason, + Message: "* Control plane components: Some control plane message\n" + + "* EtcdSomething: Some etcd message", + }, + }, } for _, tc := range testCases { diff --git a/util/conditions/v1beta2/merge_strategies.go b/util/conditions/v1beta2/merge_strategies.go index 6a29e4577895..a2ad1ce5088f 100644 --- a/util/conditions/v1beta2/merge_strategies.go +++ b/util/conditions/v1beta2/merge_strategies.go @@ -70,8 +70,9 @@ type DefaultMergeStrategyOption interface { // DefaultMergeStrategyOptions allows to set options for the DefaultMergeStrategy behaviour. type DefaultMergeStrategyOptions struct { getPriorityFunc func(condition metav1.Condition) MergePriority - computeReasonFunc func(issueConditions []ConditionWithOwnerInfo, unknownConditions []ConditionWithOwnerInfo, infoConditions []ConditionWithOwnerInfo) string targetConditionHasPositivePolarity bool + computeReasonFunc func(issueConditions []ConditionWithOwnerInfo, unknownConditions []ConditionWithOwnerInfo, infoConditions []ConditionWithOwnerInfo) string + summaryMessageTransformFunc func([]string) []string } // ApplyOptions applies the given list options on these options, @@ -100,6 +101,7 @@ func DefaultMergeStrategy(opts ...DefaultMergeStrategyOption) MergeStrategy { targetConditionHasPositivePolarity: true, computeReasonFunc: GetDefaultComputeMergeReasonFunc(issuesReportedReason, unknownReportedReason, infoReportedReason), // NOTE: when no specific reason are provided, generic ones are used. getPriorityFunc: GetDefaultMergePriorityFunc(), + summaryMessageTransformFunc: nil, } strategyOpt.ApplyOptions(opts) @@ -107,6 +109,7 @@ func DefaultMergeStrategy(opts ...DefaultMergeStrategyOption) MergeStrategy { getPriorityFunc: strategyOpt.getPriorityFunc, computeReasonFunc: strategyOpt.computeReasonFunc, targetConditionHasPositivePolarity: strategyOpt.targetConditionHasPositivePolarity, + summaryMessageTransformFunc: strategyOpt.summaryMessageTransformFunc, } } @@ -186,8 +189,9 @@ const ( // defaultMergeStrategy defines the default merge strategy for Cluster API conditions. type defaultMergeStrategy struct { getPriorityFunc func(condition metav1.Condition) MergePriority - computeReasonFunc func(issueConditions []ConditionWithOwnerInfo, unknownConditions []ConditionWithOwnerInfo, infoConditions []ConditionWithOwnerInfo) string targetConditionHasPositivePolarity bool + computeReasonFunc func(issueConditions []ConditionWithOwnerInfo, unknownConditions []ConditionWithOwnerInfo, infoConditions []ConditionWithOwnerInfo) string + summaryMessageTransformFunc func([]string) []string } // Merge all conditions in input based on a strategy that surfaces issues first, then unknown conditions, then info (if none of issues and unknown condition exists). @@ -286,6 +290,10 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit messages = append(messages, m) } + if d.summaryMessageTransformFunc != nil { + messages = d.summaryMessageTransformFunc(messages) + } + message = strings.Join(messages, "\n") } diff --git a/util/conditions/v1beta2/options.go b/util/conditions/v1beta2/options.go index c0dcb1a5804d..c212fb90089b 100644 --- a/util/conditions/v1beta2/options.go +++ b/util/conditions/v1beta2/options.go @@ -154,3 +154,11 @@ type ComputeReasonFunc func(issueConditions []ConditionWithOwnerInfo, unknownCon func (f ComputeReasonFunc) ApplyToDefaultMergeStrategy(opts *DefaultMergeStrategyOptions) { opts.computeReasonFunc = f } + +// SummaryMessageTransformFunc defines a function to be used when computing the message for a summary condition returned by the DefaultMergeStrategy. +type SummaryMessageTransformFunc func([]string) []string + +// ApplyToDefaultMergeStrategy applies this configuration to the given DefaultMergeStrategy options. +func (f SummaryMessageTransformFunc) ApplyToDefaultMergeStrategy(opts *DefaultMergeStrategyOptions) { + opts.summaryMessageTransformFunc = f +} diff --git a/util/conditions/v1beta2/summary_test.go b/util/conditions/v1beta2/summary_test.go index eb12220e8eee..480fe4fa5b2e 100644 --- a/util/conditions/v1beta2/summary_test.go +++ b/util/conditions/v1beta2/summary_test.go @@ -17,6 +17,7 @@ limitations under the License. package v1beta2 import ( + "strings" "testing" . "github.com/onsi/gomega" @@ -27,6 +28,13 @@ import ( ) func TestSummary(t *testing.T) { + toLowerMsg := func(in []string) (out []string) { + out = make([]string, len(in)) + for i := range in { + out[i] = strings.ToLower(in[i]) + } + return + } tests := []struct { name string conditions []metav1.Condition @@ -163,13 +171,23 @@ func TestSummary(t *testing.T) { {Type: "!C", Status: metav1.ConditionFalse, Reason: "Reason-!C", Message: "Message-!C"}, // info }, conditionType: clusterv1.AvailableV1Beta2Condition, - options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, CustomMergeStrategy{DefaultMergeStrategy(TargetConditionHasPositivePolarity(true), GetPriorityFunc(GetDefaultMergePriorityFunc("!C")))}}, + options: []SummaryOption{ + ForConditionTypes{"A", "B", "!C"}, + NegativePolarityConditionTypes{"!C"}, + CustomMergeStrategy{ + DefaultMergeStrategy( + TargetConditionHasPositivePolarity(true), + GetPriorityFunc(GetDefaultMergePriorityFunc("!C")), + SummaryMessageTransformFunc(toLowerMsg), + ), + }, + }, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, // True because there are many info Reason: infoReportedReason, // Using a generic reason - Message: "* B: Message-B\n" + - "* !C: Message-!C", // messages from all the info conditions (empty messages are dropped) + Message: "* b: message-b\n" + + "* !c: message-!c", // messages from all the info conditions (empty messages are dropped), all lower case due to the summary message transform fun }, }, {