Skip to content

Commit

Permalink
feat: tpu_queued_resources_time_bound
Browse files Browse the repository at this point in the history
  • Loading branch information
gryczj committed Nov 12, 2024
1 parent bdfab57 commit 7270090
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 29 deletions.
3 changes: 0 additions & 3 deletions tpu/queuedResources/createQueuedResourceNetwork.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,6 @@ async function main(

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(
`Queued resource ${queuedResourceName} with specified network created.`
);
console.log(JSON.stringify(response));
}
await callCreateQueuedResourceNetwork();
Expand Down
3 changes: 0 additions & 3 deletions tpu/queuedResources/createQueuedResourceStartupScript.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ async function main(

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(
`Queued resource ${queuedResourceName} with start-up script created.`
);
console.log(JSON.stringify(response));
}
await callCreateQueuedResourceStartupScript();
Expand Down
155 changes: 155 additions & 0 deletions tpu/queuedResources/createQueuedResourceTimeBound.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(
nodeName,
queuedResourceName,
zone,
tpuType,
tpuSoftwareVersion
) {
// [START tpu_queued_resources_time_bound]
// Import the TPU library
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;
const {Node, NetworkConfig, QueuedResource} =
require('@google-cloud/tpu').protos.google.cloud.tpu.v2alpha1;

// Instantiate a tpuClient
const tpuClient = new TpuClient();

/**
* TODO(developer): Update/uncomment these variables before running the sample.
*/
// Project ID or project number of the Google Cloud project, where you want to create queued resource.
const projectId = await tpuClient.getProjectId();

// The name of the network you want the node to connect to. The network should be assigned to your project.
const networkName = 'compute-tpu-network';

// The region of the network, that you want the node to connect to.
const region = 'europe-west4';

// The name for your queued resource.
// queuedResourceName = 'queued-resource-1';

// The name for your node.
// nodeName = 'node-name-1';

// The zone in which to create the node.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
// zone = 'europe-west4-a';

// The accelerator type that specifies the version and size of the node you want to create.
// For more information about supported accelerator types for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
// tpuType = 'v2-8';

// Software version that specifies the version of the node runtime to install. For more information,
// see https://cloud.google.com/tpu/docs/runtimes
// tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';

async function callCreateQueuedResourceTimeBound() {
// Create a node
const node = new Node({
name: nodeName,
zone,
acceleratorType: tpuType,
runtimeVersion: tpuSoftwareVersion,
// Define network
networkConfig: new NetworkConfig({
enableExternalIps: true,
network: `projects/${projectId}/global/networks/${networkName}`,
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
}),
queuedResource: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
});

// Define parent for requests
const parent = `projects/${projectId}/locations/${zone}`;

// Create queued resource
const queuedResource = new QueuedResource({
name: queuedResourceName,
tpu: {
nodeSpec: [
{
parent,
node,
nodeId: nodeName,
},
],
},
queueingPolicy: new QueuedResource.QueueingPolicy({
// You can specify a duration after which a resource should be allocated.
validAfterDuration: {
// format: hour * 3600s
seconds: 6 * 3600,
},
// You can specify how long a queued resource request remains valid.
// validUntilDuration: {
// // format: hour * 3600s
// seconds: 6 * 3600,
// },
// You can specify a time after which a resource should be allocated.
// validAfterTime: {
// // format: new Date('YOUR_TIMESTAMP').getTime() / 1000
// seconds: new Date('2024-10-25T11:45:00Z').getTime() / 1000,
// },
// You can specify a time before which the resource should be allocated.
// validUntilTime: {
// // format: new Date('YOUR_TIMESTAMP').getTime() / 1000
// seconds: new Date('2024-10-25T11:45:00Z').getTime() / 1000,
// },
// You can specify an allocation interval. `startTime` specifies the beginning of the allocation interval
// and `endTime` specifies the end of the allocation interval.
// validInterval: {
// // format: new Date('YOUR_TIMESTAMP').getTime() / 1000
// startTime: {
// seconds: new Date('2024-10-25T15:45:00Z').getTime() / 1000,
// },
// endTime: {
// seconds: new Date('2024-10-26T11:45:00Z').getTime() / 1000,
// },
// },
}),
});

const request = {
parent: `projects/${projectId}/locations/${zone}`,
queuedResource,
queuedResourceId: queuedResourceName,
};

const [operation] = await tpuClient.createQueuedResource(request);

// Wait for the create operation to complete.
const [response] = await operation.promise();

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(JSON.stringify(response));
}
await callCreateQueuedResourceTimeBound();
// [END tpu_queued_resources_time_bound]
}

main(...process.argv.slice(2)).catch(err => {
console.error(err);
process.exitCode = 1;
});
22 changes: 11 additions & 11 deletions tpu/test/createQueuedResourceNetwork.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,25 @@ describe('TPU queued resource with specified network', async () => {
);
});

it('should create queued resource with specified network', () => {
it('should create queued resource', () => {
const networkConfig = {
network: `projects/${projectId}/global/networks/compute-tpu-network`,
subnetwork: `projects/${projectId}/regions/europe-west4/subnetworks/compute-tpu-network`,
enableExternalIps: true,
};

const response = execSync(
`node ./queuedResources/createQueuedResourceNetwork.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
const response = JSON.parse(
execSync(
`node ./queuedResources/createQueuedResourceNetwork.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
)
);

assert(
response.includes(
`Queued resource ${queuedResourceName} with specified network created.`
)
assert.deepEqual(
response.tpu.nodeSpec[0].node.networkConfig,
networkConfig
);
assert(response.includes(JSON.stringify(networkConfig)));
});
});
21 changes: 9 additions & 12 deletions tpu/test/createQueuedResourceStartupScript.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,21 @@ describe('TPU queued resource with start-up script', async () => {
);
});

it('should create queued resource with start-up script', () => {
it('should create queued resource', () => {
const metadata = {
'startup-script':
'#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1',
};

const response = execSync(
`node ./queuedResources/createQueuedResourceStartupScript.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
);

assert(
response.includes(
`Queued resource ${queuedResourceName} with start-up script created.`
const response = JSON.parse(
execSync(
`node ./queuedResources/createQueuedResourceStartupScript.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
)
);
assert(response.includes(JSON.stringify(metadata)));

assert.deepEqual(response.tpu.nodeSpec[0].node.metadata, metadata);
});
});
59 changes: 59 additions & 0 deletions tpu/test/createQueuedResourceTimeBound.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const path = require('path');
const assert = require('node:assert/strict');
const {after, describe, it} = require('mocha');
const cp = require('child_process');

const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
const cwd = path.join(__dirname, '..');

describe('TPU time bound queued resource', async () => {
const queuedResourceName = `queued-resource-time-bound-${Math.floor(Math.random() * 1000 + 1)}`;
const nodeName = `node-time-bound-2a2b3c${Math.floor(Math.random() * 1000 + 1)}`;
const zone = 'us-west4-a';
const tpuType = 'v5litepod-1';
const tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';

after(() => {
// Delete queued resource
execSync(
`node ./queuedResources/forceDeleteQueuedResource.js ${queuedResourceName} ${zone}`,
{
cwd,
}
);
});

it('should create queued resource', () => {
const response = JSON.parse(
execSync(
`node ./queuedResources/createQueuedResourceTimeBound.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
)
);

assert.ok(response.queueingPolicy);
assert.ok(response.queueingPolicy.validAfterTime);
assert(typeof response.queueingPolicy.validAfterTime.seconds, 'string');
assert(typeof response.queueingPolicy.validAfterTime.nano, 'number');
});
});

0 comments on commit 7270090

Please sign in to comment.