From f2d7c1f2b019034d0d194be7f9edc5d7a61fee94 Mon Sep 17 00:00:00 2001 From: Cheyu Wu Date: Sun, 29 Dec 2024 11:58:15 +0800 Subject: [PATCH] [Feature]: Add a new event type FailedToDeleteWorkerPodCollection (#2680) --- .../controllers/ray/raycluster_controller.go | 4 ++-- ray-operator/controllers/ray/utils/constant.go | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index 1bfa402537..fcdad71a02 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -668,7 +668,7 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv if suspendStatus == rayv1.RayClusterSuspending || (!statusConditionGateEnabled && instance.Spec.Suspend != nil && *instance.Spec.Suspend) { if _, err := r.deleteAllPods(ctx, common.RayClusterAllPodsAssociationOptions(instance)); err != nil { - r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeletePod), + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeletePodCollection), "Failed deleting Pods due to suspension for RayCluster %s/%s, %v", instance.Namespace, instance.Name, err) return errstd.Join(utils.ErrFailedDeleteAllPods, err) @@ -779,7 +779,7 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv // Delete all workers if worker group is suspended and skip reconcile if worker.Suspend != nil && *worker.Suspend { if _, err := r.deleteAllPods(ctx, common.RayClusterGroupPodsAssociationOptions(instance, worker.GroupName)); err != nil { - r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPod), + r.Recorder.Eventf(instance, corev1.EventTypeWarning, string(utils.FailedToDeleteWorkerPodCollection), "Failed deleting worker Pods for suspended group %s in RayCluster %s/%s, %v", worker.GroupName, instance.Namespace, instance.Name, err) return errstd.Join(utils.ErrFailedDeleteWorkerPod, err) } diff --git a/ray-operator/controllers/ray/utils/constant.go b/ray-operator/controllers/ray/utils/constant.go index ebb90c994e..3c97ca47a1 100644 --- a/ray-operator/controllers/ray/utils/constant.go +++ b/ray-operator/controllers/ray/utils/constant.go @@ -246,10 +246,11 @@ const ( FailedToDeleteHeadPod K8sEventType = "FailedToDeleteHeadPod" // Worker Pod event list - CreatedWorkerPod K8sEventType = "CreatedWorkerPod" - FailedToCreateWorkerPod K8sEventType = "FailedToCreateWorkerPod" - DeletedWorkerPod K8sEventType = "DeletedWorkerPod" - FailedToDeleteWorkerPod K8sEventType = "FailedToDeleteWorkerPod" + CreatedWorkerPod K8sEventType = "CreatedWorkerPod" + FailedToCreateWorkerPod K8sEventType = "FailedToCreateWorkerPod" + DeletedWorkerPod K8sEventType = "DeletedWorkerPod" + FailedToDeleteWorkerPod K8sEventType = "FailedToDeleteWorkerPod" + FailedToDeleteWorkerPodCollection K8sEventType = "FailedToDeleteWorkerPodCollection" // Redis Cleanup Job event list CreatedRedisCleanupJob K8sEventType = "CreatedRedisCleanupJob" @@ -271,8 +272,9 @@ const ( InvalidRayServiceSpec K8sEventType = "InvalidRayServiceSpec" // Generic Pod event list - DeletedPod K8sEventType = "DeletedPod" - FailedToDeletePod K8sEventType = "FailedToDeletePod" + DeletedPod K8sEventType = "DeletedPod" + FailedToDeletePod K8sEventType = "FailedToDeletePod" + FailedToDeletePodCollection K8sEventType = "FailedToDeletePodCollection" // Ingress event list CreatedIngress K8sEventType = "CreatedIngress"