Skip to content

Commit

Permalink
add unit tests
Browse files Browse the repository at this point in the history
Signed-off-by: Andrew Sy Kim <[email protected]>
  • Loading branch information
andrewsykim committed Dec 30, 2024
1 parent 6ddd641 commit cb3980c
Show file tree
Hide file tree
Showing 4 changed files with 522 additions and 13 deletions.
2 changes: 1 addition & 1 deletion docs/reference/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ _Appears in:_
| `clusterSelector` _object (keys:string, values:string)_ | clusterSelector is used to select running rayclusters by labels | | |
| `submitterConfig` _[SubmitterConfig](#submitterconfig)_ | Configurations of submitter k8s job. | | |
| `managedBy` _string_ | ManagedBy is an optional configuration for the controller or entity that manages a RayJob.<br />The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.<br />The kuberay-operator reconciles a RayJob which doesn't have this field at all or<br />the field value is the reserved string 'ray.io/kuberay-operator',<br />but delegates reconciling the RayJob with 'kueue.x-k8s.io/multikueue' to the Kueue.<br />The field is immutable. | | |
| `deletionPolicy` _[DeletionPolicy](#deletionpolicy)_ | deletionPolicy indicates what resources of the RayJob are deleted upon job completion.<br />Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'None'.<br />If unset, deletion policy is based on 'spec.shutdownAfterJobFinishes'.<br />This field requires the RayJobDeletionPolicy feature gate to be enabled. | | |
| `deletionPolicy` _[DeletionPolicy](#deletionpolicy)_ | deletionPolicy indicates what resources of the RayJob are deleted upon job completion.<br />Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'DeleteNone'.<br />If unset, deletion policy is based on 'spec.shutdownAfterJobFinishes'.<br />This field requires the RayJobDeletionPolicy feature gate to be enabled. | | |
| `entrypoint` _string_ | INSERT ADDITIONAL SPEC FIELDS - desired state of cluster<br />Important: Run "make" to regenerate code after modifying this file | | |
| `runtimeEnvYAML` _string_ | RuntimeEnvYAML represents the runtime environment configuration<br />provided as a multi-line YAML string. | | |
| `jobId` _string_ | If jobId is not set, a new jobId will be auto-generated. | | |
Expand Down
6 changes: 3 additions & 3 deletions ray-operator/apis/ray/v1/rayjob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ type DeletionPolicy string
const (
DeleteClusterDeletionPolicy DeletionPolicy = "DeleteCluster"
DeleteWorkersDeletionPolicy DeletionPolicy = "DeleteWorkers"
DeleteSelfDeltionPolicy DeletionPolicy = "DeleteSelf"
DeleteNoneDeletionPolicy DeletionPolicy = "None"
DeleteSelfDeletionPolicy DeletionPolicy = "DeleteSelf"
DeleteNoneDeletionPolicy DeletionPolicy = "DeleteNone"
)

type SubmitterConfig struct {
Expand Down Expand Up @@ -105,7 +105,7 @@ type RayJobSpec struct {
// +kubebuilder:validation:XValidation:rule="self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue']",message="the managedBy field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'"
ManagedBy *string `json:"managedBy,omitempty"`
// deletionPolicy indicates what resources of the RayJob are deleted upon job completion.
// Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'None'.
// Valid values are 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf' or 'DeleteNone'.
// If unset, deletion policy is based on 'spec.shutdownAfterJobFinishes'.
// This field requires the RayJobDeletionPolicy feature gate to be enabled.
// +kubebuilder:validation:XValidation:rule="self in ['DeleteCluster', 'DeleteWorkers', 'DeleteSelf', 'DeleteNone']",message="the deletionPolicy field value must be either 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone'"
Expand Down
17 changes: 8 additions & 9 deletions ray-operator/controllers/ray/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,9 +375,9 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
logger.Info("Deleting RayCluster", "RayCluster", rayJobInstance.Status.RayClusterName)
_, err = r.deleteClusterResources(ctx, rayJobInstance)
case rayv1.DeleteWorkersDeletionPolicy:
logger.Info("Scaling all worker replicas to 0", "RayCluster", rayJobInstance.Status.RayClusterName)
_, err = r.scaleWorkerReplicasToZero(ctx, rayJobInstance)
case rayv1.DeleteSelfDeltionPolicy:
logger.Info("Suspending all worker groups", "RayCluster", rayJobInstance.Status.RayClusterName)
err = r.suspendWorkerGroups(ctx, rayJobInstance)
case rayv1.DeleteSelfDeletionPolicy:
logger.Info("Deleting RayJob")
err = r.Client.Delete(ctx, rayJobInstance)
default:
Expand Down Expand Up @@ -655,29 +655,28 @@ func (r *RayJobReconciler) deleteClusterResources(ctx context.Context, rayJobIns
return isClusterDeleted, nil
}

func (r *RayJobReconciler) scaleWorkerReplicasToZero(ctx context.Context, rayJobInstance *rayv1.RayJob) (bool, error) {
func (r *RayJobReconciler) suspendWorkerGroups(ctx context.Context, rayJobInstance *rayv1.RayJob) error {
logger := ctrl.LoggerFrom(ctx)
clusterIdentifier := common.RayJobRayClusterNamespacedName(rayJobInstance)

cluster := rayv1.RayCluster{}
if err := r.Get(ctx, clusterIdentifier, &cluster); err != nil {
return false, err
return err
}

for i := range cluster.Spec.WorkerGroupSpecs {
cluster.Spec.WorkerGroupSpecs[i].Replicas = ptr.To[int32](0)
cluster.Spec.WorkerGroupSpecs[i].MinReplicas = ptr.To[int32](0)
cluster.Spec.WorkerGroupSpecs[i].Suspend = ptr.To[bool](true)
}

if err := r.Update(ctx, &cluster); err != nil {
r.Recorder.Eventf(rayJobInstance, corev1.EventTypeWarning, string(utils.FailedToUpdateRayCluster), "Failed to update cluster %s/%s: %v", cluster.Namespace, cluster.Name, err)
return false, err
return err
}

logger.Info("All worker groups for RayCluster has been scaled to 0", "RayCluster", clusterIdentifier)
r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, string(utils.UpdatedRayCluster), "Updated cluster %s/%s", cluster.Namespace, cluster.Name)

return true, nil
return nil
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
Loading

0 comments on commit cb3980c

Please sign in to comment.