Skip to content

Commit

Permalink
chore: add default topologies for tests (#3769)
Browse files Browse the repository at this point in the history
  • Loading branch information
mykysha authored Dec 11, 2024
1 parent 03ae95e commit 3378beb
Show file tree
Hide file tree
Showing 18 changed files with 205 additions and 232 deletions.
4 changes: 1 addition & 3 deletions pkg/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3772,9 +3772,7 @@ func TestSnapshotError(t *testing.T) {
features.SetFeatureGateDuringTest(t, features.TopologyAwareScheduling, true)
ctx, _ := utiltesting.ContextWithLog(t)

topology := *utiltesting.MakeTopology("default").
Levels(corev1.LabelHostname).
Obj()
topology := *utiltesting.MakeDefaultOneLevelTopology("default")
flavor := *utiltesting.MakeResourceFlavor("tas-default").
TopologyName("default").
Obj()
Expand Down
4 changes: 1 addition & 3 deletions pkg/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3815,9 +3815,7 @@ func TestScheduleForTAS(t *testing.T) {
Ready().
Obj(),
}
defaultSingleLevelTopology := *utiltesting.MakeTopology("tas-single-level").
Levels(corev1.LabelHostname).
Obj()
defaultSingleLevelTopology := *utiltesting.MakeDefaultOneLevelTopology("tas-single-level")
defaultTwoLevelTopology := *utiltesting.MakeTopology("tas-two-level").
Levels(tasRackLabel, corev1.LabelHostname).
Obj()
Expand Down
49 changes: 49 additions & 0 deletions pkg/util/testing/defaults.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package testing

import (
corev1 "k8s.io/api/core/v1"

kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
)

const (
DefaultRackTopologyLevel = "cloud.provider.com/topology-rack"
DefaultBlockTopologyLevel = "cloud.provider.com/topology-block"
)

// MakeDefaultOneLevelTopology creates a default topology with hostname level.
func MakeDefaultOneLevelTopology(name string) *kueuealpha.Topology {
return MakeTopology(name).
Levels(corev1.LabelHostname).
Obj()
}

// MakeDefaultTwoLevelTopology creates a default topology with block and rack levels.
func MakeDefaultTwoLevelTopology(name string) *kueuealpha.Topology {
return MakeTopology(name).
Levels(DefaultBlockTopologyLevel, DefaultRackTopologyLevel).
Obj()
}

// MakeDefaultThreeLevelTopology creates a default topology with block, rack and hostname levels.
func MakeDefaultThreeLevelTopology(name string) *kueuealpha.Topology {
return MakeTopology(name).
Levels(DefaultBlockTopologyLevel, DefaultRackTopologyLevel, corev1.LabelHostname).
Obj()
}
8 changes: 3 additions & 5 deletions test/e2e/singlecluster/tas_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling", func() {
clusterQueue *kueue.ClusterQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("hostname").Levels(
corev1.LabelHostname,
).Obj()
topology = testing.MakeDefaultOneLevelTopology("hostname")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

onDemandRF = testing.MakeResourceFlavor("on-demand").
Expand Down Expand Up @@ -157,7 +155,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling", func() {
localQueue *kueue.LocalQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("hostname").Levels(corev1.LabelHostname).Obj()
topology = testing.MakeDefaultOneLevelTopology("hostname")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

onDemandRF = testing.MakeResourceFlavor("on-demand").
Expand Down Expand Up @@ -292,7 +290,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling", func() {
localQueue *kueue.LocalQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("hostname").Levels(corev1.LabelHostname).Obj()
topology = testing.MakeDefaultOneLevelTopology("hostname")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

onDemandRF = testing.MakeResourceFlavor("on-demand").
Expand Down
24 changes: 9 additions & 15 deletions test/e2e/tas/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,9 @@ import (
)

const (
instanceType = "tas-group"
tasNodeGroupLabel = "cloud.provider.com/node-group"
topologyLevelRack = "cloud.provider.com/topology-rack"
topologyLevelBlock = "cloud.provider.com/topology-block"
extraResource = "example.com/gpu"
instanceType = "tas-group"
tasNodeGroupLabel = "cloud.provider.com/node-group"
extraResource = "example.com/gpu"
)

var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Expand All @@ -68,11 +66,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
clusterQueue *kueue.ClusterQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("datacenter").Levels(
topologyLevelBlock,
topologyLevelRack,
corev1.LabelHostname,
).Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -110,7 +104,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Limit(extraResource, "1").
Obj()
sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, topologyLevelRack).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultRackTopologyLevel).
Image(util.E2eTestSleepImage, []string{"100ms"}).
Obj()
gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed())
Expand All @@ -134,7 +128,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Limit(extraResource, "1").
Obj()
sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}).
PodAnnotation(kueuealpha.PodSetPreferredTopologyAnnotation, topologyLevelRack).
PodAnnotation(kueuealpha.PodSetPreferredTopologyAnnotation, testing.DefaultRackTopologyLevel).
Image(util.E2eTestSleepImage, []string{"100ms"}).
Obj()
gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed())
Expand Down Expand Up @@ -187,7 +181,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Limit(extraResource, "1").
Obj()
sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, topologyLevelBlock).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultBlockTopologyLevel).
Image(util.E2eTestSleepImage, []string{"100ms"}).
Obj()
gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed())
Expand Down Expand Up @@ -241,7 +235,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Limit(extraResource, "1").
Obj()
sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, topologyLevelBlock).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultBlockTopologyLevel).
Image(util.E2eTestSleepImage, []string{"10ms"}).
Obj()
gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed())
Expand Down Expand Up @@ -270,7 +264,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Job", func() {
Limit(extraResource, "1").
Obj()
sampleJob = (&testingjob.JobWrapper{Job: *sampleJob}).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, topologyLevelBlock).
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultBlockTopologyLevel).
Image(util.E2eTestSleepImage, []string{"60s"}).
Obj()
gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed())
Expand Down
8 changes: 2 additions & 6 deletions test/e2e/tas/jobset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for JobSet", func() {
clusterQueue *kueue.ClusterQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("datacenter").Levels(
topologyLevelBlock,
topologyLevelRack,
corev1.LabelHostname,
).Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -106,7 +102,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for JobSet", func() {
Image: util.E2eTestSleepImage,
Args: []string{"60s"},
PodAnnotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: topologyLevelBlock,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultBlockTopologyLevel,
},
},
).
Expand Down
8 changes: 3 additions & 5 deletions test/e2e/tas/mpijob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for MPIJob", func() {
}
gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())

topology = testing.MakeTopology("datacenter").
Levels(topologyLevelBlock, topologyLevelRack, corev1.LabelHostname).
Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -106,7 +104,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for MPIJob", func() {
ReplicaCount: launcherReplicas,
RestartPolicy: corev1.RestartPolicyOnFailure,
Annotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: topologyLevelRack,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultRackTopologyLevel,
},
},
testingmpijob.MPIJobReplicaSpecRequirement{
Expand All @@ -116,7 +114,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for MPIJob", func() {
ReplicaCount: workerReplicas,
RestartPolicy: corev1.RestartPolicyOnFailure,
Annotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: topologyLevelBlock,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultBlockTopologyLevel,
},
},
).
Expand Down
8 changes: 2 additions & 6 deletions test/e2e/tas/pod_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Pod group", func() {
clusterQueue *kueue.ClusterQueue
)
ginkgo.BeforeEach(func() {
topology = testing.MakeTopology("datacenter").Levels(
topologyLevelBlock,
topologyLevelRack,
corev1.LabelHostname,
).Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -93,7 +89,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for Pod group", func() {
Queue("test-queue").
Request(extraResource, "1").
Limit(extraResource, "1").
Annotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.provider.com/topology-block")
Annotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultBlockTopologyLevel)
podGroup := basePod.MakeIndexedGroup(numPods)

for _, pod := range podGroup {
Expand Down
8 changes: 3 additions & 5 deletions test/e2e/tas/pytorch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for PyTorchJob", func() {
}
gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())

topology = testing.MakeTopology("datacenter").
Levels(topologyLevelBlock, topologyLevelRack, corev1.LabelHostname).
Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -105,7 +103,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for PyTorchJob", func() {
ReplicaCount: masterReplicas,
RestartPolicy: kftraining.RestartPolicyOnFailure,
Annotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: topologyLevelRack,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultRackTopologyLevel,
},
},
testingpytorchjob.PyTorchReplicaSpecRequirement{
Expand All @@ -115,7 +113,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for PyTorchJob", func() {
ReplicaCount: workerReplicas,
RestartPolicy: kftraining.RestartPolicyOnFailure,
Annotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: topologyLevelBlock,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultBlockTopologyLevel,
},
},
).
Expand Down
6 changes: 2 additions & 4 deletions test/e2e/tas/statefulset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for StatefulSet", func() {
ns = &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{GenerateName: "e2e-tas-sts-"}}
gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())

topology = testing.MakeTopology("datacenter").
Levels(topologyLevelBlock, topologyLevelRack, corev1.LabelHostname).
Obj()
topology = testing.MakeDefaultThreeLevelTopology("datacenter")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -85,7 +83,7 @@ var _ = ginkgo.Describe("TopologyAwareScheduling for StatefulSet", func() {
Limit(extraResource, "1").
Replicas(replicas).
Queue(localQueue.Name).
PodTemplateSpecAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.provider.com/topology-block").
PodTemplateSpecAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, testing.DefaultBlockTopologyLevel).
Obj()
gomega.Expect(k8sClient.Create(ctx, sts)).Should(gomega.Succeed())

Expand Down
22 changes: 9 additions & 13 deletions test/integration/controller/jobs/jobset/jobset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1156,8 +1156,6 @@ var _ = ginkgo.Describe("JobSet controller interacting with scheduler", ginkgo.O
var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
const (
nodeGroupLabel = "node-group"
tasBlockLabel = "cloud.com/topology-block"
tasRackLabel = "cloud.com/topology-rack"
)

var (
Expand Down Expand Up @@ -1190,8 +1188,8 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
nodes = []corev1.Node{
*testingnode.MakeNode("b1r1").
Label(nodeGroupLabel, "tas").
Label(tasBlockLabel, "b1").
Label(tasRackLabel, "r1").
Label(testing.DefaultBlockTopologyLevel, "b1").
Label(testing.DefaultRackTopologyLevel, "r1").
StatusAllocatable(corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
Expand All @@ -1201,9 +1199,7 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
}
util.CreateNodes(ctx, k8sClient, nodes)

topology = testing.MakeTopology("default").Levels(
tasBlockLabel, tasRackLabel,
).Obj()
topology = testing.MakeDefaultTwoLevelTopology("default")
gomega.Expect(k8sClient.Create(ctx, topology)).Should(gomega.Succeed())

tasFlavor = testing.MakeResourceFlavor("tas-flavor").
Expand Down Expand Up @@ -1241,7 +1237,7 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
Parallelism: 1,
Completions: 1,
PodAnnotations: map[string]string{
kueuealpha.PodSetRequiredTopologyAnnotation: tasBlockLabel,
kueuealpha.PodSetRequiredTopologyAnnotation: testing.DefaultBlockTopologyLevel,
},
Image: util.E2eTestSleepImage,
Args: []string{"1ms"},
Expand All @@ -1252,7 +1248,7 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
Parallelism: 1,
Completions: 1,
PodAnnotations: map[string]string{
kueuealpha.PodSetPreferredTopologyAnnotation: tasRackLabel,
kueuealpha.PodSetPreferredTopologyAnnotation: testing.DefaultRackTopologyLevel,
},
Image: util.E2eTestSleepImage,
Args: []string{"1ms"},
Expand All @@ -1279,7 +1275,7 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
Name: "rj1",
Count: 1,
TopologyRequest: &kueue.PodSetTopologyRequest{
Required: ptr.To(tasBlockLabel),
Required: ptr.To(testing.DefaultBlockTopologyLevel),
PodIndexLabel: ptr.To(batchv1.JobCompletionIndexAnnotation),
SubGroupIndexLabel: ptr.To(jobsetapi.JobIndexKey),
SubGroupCount: ptr.To[int32](1),
Expand All @@ -1289,7 +1285,7 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
Name: "rj2",
Count: 1,
TopologyRequest: &kueue.PodSetTopologyRequest{
Preferred: ptr.To(tasRackLabel),
Preferred: ptr.To(testing.DefaultRackTopologyLevel),
PodIndexLabel: ptr.To(batchv1.JobCompletionIndexAnnotation),
SubGroupIndexLabel: ptr.To(jobsetapi.JobIndexKey),
SubGroupCount: ptr.To[int32](1),
Expand All @@ -1311,13 +1307,13 @@ var _ = ginkgo.Describe("JobSet controller when TopologyAwareScheduling enabled"
g.Expect(wl.Status.Admission.PodSetAssignments).Should(gomega.HaveLen(2))
g.Expect(wl.Status.Admission.PodSetAssignments[0].TopologyAssignment).Should(gomega.BeComparableTo(
&kueue.TopologyAssignment{
Levels: []string{tasBlockLabel, tasRackLabel},
Levels: []string{testing.DefaultBlockTopologyLevel, testing.DefaultRackTopologyLevel},
Domains: []kueue.TopologyDomainAssignment{{Count: 1, Values: []string{"b1", "r1"}}},
},
))
g.Expect(wl.Status.Admission.PodSetAssignments[1].TopologyAssignment).Should(gomega.BeComparableTo(
&kueue.TopologyAssignment{
Levels: []string{tasBlockLabel, tasRackLabel},
Levels: []string{testing.DefaultBlockTopologyLevel, testing.DefaultRackTopologyLevel},
Domains: []kueue.TopologyDomainAssignment{{Count: 1, Values: []string{"b1", "r1"}}},
},
))
Expand Down
Loading

0 comments on commit 3378beb

Please sign in to comment.