From 86ca369ffcdb3ed07944034294dc0ad3a43e8429 Mon Sep 17 00:00:00 2001 From: Jorge Aguilera Date: Sat, 9 Nov 2024 20:12:02 +0100 Subject: [PATCH] refactor job status query logic Signed-off-by: Jorge Aguilera --- .../nomad/executor/NomadService.groovy | 9 +- .../nomad/executor/NomadTaskHandler.groovy | 4 +- .../test/nextflow/nomad/NomadDSLSpec.groovy | 2 +- .../nomad/executor/NomadServiceSpec.groovy | 140 +----------------- .../src/testResources/allocations.json | 132 +++++++++++++++++ validation/run-all.sh | 10 +- 6 files changed, 149 insertions(+), 148 deletions(-) create mode 100644 plugins/nf-nomad/src/testResources/allocations.json diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy index bfc6954..a04ad2e 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadService.groovy @@ -28,6 +28,7 @@ import nextflow.nomad.builders.JobBuilder import nextflow.nomad.config.NomadConfig import nextflow.processor.TaskRun import nextflow.exception.ProcessSubmitException +import org.threeten.bp.OffsetDateTime import java.nio.file.Path @@ -125,11 +126,11 @@ class NomadService implements Closeable{ it.modifyIndex }?.last() : null TaskState currentState = last?.taskStates?.values()?.last() - log.debug "Task $jobId , state=${currentState.state}" - currentState + log.debug "Task $jobId , state=${currentState?.state}" + currentState ?: new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now()) }catch(Exception e){ log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) - new TaskState(state: "unknow") + new TaskState(state: "unknown", failed: true, finishedAt: OffsetDateTime.now()) } } @@ -145,7 +146,7 @@ class NomadService implements Closeable{ job.status }catch (Exception e){ log.debug("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e) - "Unknow" + "unknown" } } diff --git a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy index 63d9931..87a52b5 100644 --- a/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy +++ b/plugins/nf-nomad/src/main/nextflow/nomad/executor/NomadTaskHandler.groovy @@ -78,7 +78,7 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask { if(isSubmitted()) { def state = taskState0() // include `terminated` state to allow the handler status to progress - if( state && ( "running" == state.state || "terminated" == state.state)){ + if( state && ( ["running","terminated"].contains(state.state))){ status = TaskStatus.RUNNING determineClientNode() return true @@ -93,7 +93,7 @@ class NomadTaskHandler extends TaskHandler implements FusionAwareTask { def state = taskState0() - final isFinished = state && state.finishedAt != null + final isFinished = state && (state.finishedAt != null || state.state == "unknow") log.debug "[NOMAD] checkIfCompleted task.name=$task.name; state=${state?.state} completed=$isFinished" diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy index 30b2f1c..193f3a5 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy @@ -110,7 +110,7 @@ class NomadDSLSpec extends Dsl2Spec{ if( recordedRequest.path.endsWith("/allocations")) { summary = true return new MockResponse().setResponseCode(200) - .setBody('{"Summary": {"test":{"Complete":1}}}').addHeader("Content-Type", "application/json") + .setBody(this.getClass().getResourceAsStream("/allocations.json").text).addHeader("Content-Type", "application/json") }else { return new MockResponse().setResponseCode(200) .setBody('{"Status": "dead"}').addHeader("Content-Type", "application/json") diff --git a/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy b/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy index 4bb8427..7fa442e 100644 --- a/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy +++ b/plugins/nf-nomad/src/test/nextflow/nomad/executor/NomadServiceSpec.groovy @@ -73,145 +73,11 @@ class NomadServiceSpec extends Specification{ recordedRequest.path == "/v1/job/theId/allocations" and: - state == "Unknown" + state.state == "unknown" when: mockWebServer.enqueue(new MockResponse() - .setBody(""" - [ - { - "ID": "ed344e0a-7290-d117-41d3-a64f853ca3c2", - "EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189", - "Name": "example.cache[0]", - "NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823", - "PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", - "NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b", - "RescheduleTracker": { - "Events": [ - { - "PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", - "PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e", - "RescheduleTime": 1517434161192946200, - "Delay": 5000000000 - } - ] - }, - "JobID": "example", - "TaskGroup": "cache", - "DesiredStatus": "run", - "DesiredDescription": "", - "ClientStatus": "running", - "ClientDescription": "", - "TaskStates": { - "redis": { - "State": "running", - "Failed": false, - "StartedAt": "2017-05-25T23:41:23.240184101Z", - "FinishedAt": "0001-01-01T00:00:00Z", - "Events": [ - { - "Type": "Received", - "Time": 1495755675956923000, - "FailsTask": false, - "RestartReason": "", - "SetupError": "", - "DriverError": "", - "ExitCode": 0, - "Signal": 0, - "Message": "", - "KillTimeout": 0, - "KillError": "", - "KillReason": "", - "StartDelay": 0, - "DownloadError": "", - "ValidationError": "", - "DiskLimit": 0, - "FailedSibling": "", - "VaultError": "", - "TaskSignalReason": "", - "TaskSignal": "", - "DriverMessage": "" - }, - { - "Type": "Task Setup", - "Time": 1495755675957466400, - "FailsTask": false, - "RestartReason": "", - "SetupError": "", - "DriverError": "", - "ExitCode": 0, - "Signal": 0, - "Message": "Building Task Directory", - "KillTimeout": 0, - "KillError": "", - "KillReason": "", - "StartDelay": 0, - "DownloadError": "", - "ValidationError": "", - "DiskLimit": 0, - "FailedSibling": "", - "VaultError": "", - "TaskSignalReason": "", - "TaskSignal": "", - "DriverMessage": "" - }, - { - "Type": "Driver", - "Time": 1495755675970286800, - "FailsTask": false, - "RestartReason": "", - "SetupError": "", - "DriverError": "", - "ExitCode": 0, - "Signal": 0, - "Message": "", - "KillTimeout": 0, - "KillError": "", - "KillReason": "", - "StartDelay": 0, - "DownloadError": "", - "ValidationError": "", - "DiskLimit": 0, - "FailedSibling": "", - "VaultError": "", - "TaskSignalReason": "", - "TaskSignal": "", - "DriverMessage": "Downloading image redis:7" - }, - { - "Type": "Started", - "Time": 1495755683227522000, - "FailsTask": false, - "RestartReason": "", - "SetupError": "", - "DriverError": "", - "ExitCode": 0, - "Signal": 0, - "Message": "", - "KillTimeout": 0, - "KillError": "", - "KillReason": "", - "StartDelay": 0, - "DownloadError": "", - "ValidationError": "", - "DiskLimit": 0, - "FailedSibling": "", - "VaultError": "", - "TaskSignalReason": "", - "TaskSignal": "", - "DriverMessage": "" - } - ] - } - }, - "CreateIndex": 9, - "ModifyIndex": 13, - "CreateTime": 1495755675944527600, - "ModifyTime": 1495755675944527600 - } - ] - - """) + .setBody(this.getClass().getResourceAsStream("/allocations.json").text) .addHeader("Content-Type", "application/json")); state = service.getTaskState("theId") @@ -222,7 +88,7 @@ class NomadServiceSpec extends Specification{ recordedRequest.path == "/v1/job/theId/allocations" and: - state == "running" + state.state == "running" } diff --git a/plugins/nf-nomad/src/testResources/allocations.json b/plugins/nf-nomad/src/testResources/allocations.json new file mode 100644 index 0000000..bd7b68e --- /dev/null +++ b/plugins/nf-nomad/src/testResources/allocations.json @@ -0,0 +1,132 @@ +[ + { + "ID": "ed344e0a-7290-d117-41d3-a64f853ca3c2", + "EvalID": "a9c5effc-2242-51b2-f1fe-054ee11ab189", + "Name": "example.cache[0]", + "NodeID": "cb1f6030-a220-4f92-57dc-7baaabdc3823", + "PreviousAllocation": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", + "NextAllocation": "cd13d9b9-4f97-7184-c88b-7b451981616b", + "RescheduleTracker": { + "Events": [ + { + "PrevAllocID": "516d2753-0513-cfc7-57ac-2d6fac18b9dc", + "PrevNodeID": "9230cd3b-3bda-9a3f-82f9-b2ea8dedb20e", + "RescheduleTime": 1517434161192946200, + "Delay": 5000000000 + } + ] + }, + "JobID": "example", + "TaskGroup": "cache", + "DesiredStatus": "run", + "DesiredDescription": "", + "ClientStatus": "running", + "ClientDescription": "", + "TaskStates": { + "redis": { + "State": "running", + "Failed": false, + "StartedAt": "2017-05-25T23:41:23.240184101Z", + "FinishedAt": "0001-01-01T00:00:00Z", + "Events": [ + { + "Type": "Received", + "Time": 1495755675956923000, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + }, + { + "Type": "Task Setup", + "Time": 1495755675957466400, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "Building Task Directory", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + }, + { + "Type": "Driver", + "Time": 1495755675970286800, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "Downloading image redis:7" + }, + { + "Type": "Started", + "Time": 1495755683227522000, + "FailsTask": false, + "RestartReason": "", + "SetupError": "", + "DriverError": "", + "ExitCode": 0, + "Signal": 0, + "Message": "", + "KillTimeout": 0, + "KillError": "", + "KillReason": "", + "StartDelay": 0, + "DownloadError": "", + "ValidationError": "", + "DiskLimit": 0, + "FailedSibling": "", + "VaultError": "", + "TaskSignalReason": "", + "TaskSignal": "", + "DriverMessage": "" + } + ] + } + }, + "CreateIndex": 9, + "ModifyIndex": 13, + "CreateTime": 1495755675944527600, + "ModifyTime": 1495755675944527600 + } +] \ No newline at end of file diff --git a/validation/run-all.sh b/validation/run-all.sh index 8d41317..54bf01e 100755 --- a/validation/run-all.sh +++ b/validation/run-all.sh @@ -43,13 +43,15 @@ if [ "$SKIPLOCAL" == 0 ]; then -r dev -profile test,docker \ --outdir $(pwd)/nomad_temp/scratchdir/out - ./run-pipeline.sh -c basic/nextflow.config bactopia/bactopia \ - --accession SRX4563634 --coverage 100 --genome_size 2800000 \ - -profile test,docker --outdir $(pwd)/nomad_temp/scratchdir/bactopia/outdir \ - --datasets_cache $(pwd)/nomad_temp/scratchdir/bactopia/datasets +# Batctopia is failing with current version of nextflow due some bug in --max_cpus but cant find a fix +# ./run-pipeline.sh -c basic/nextflow.config bactopia/bactopia \ +# --accession SRX4563634 --coverage 100 --genome_size median --max_cpus 2 \ +# -profile test,docker --outdir $(pwd)/nomad_temp/scratchdir/bactopia/outdir \ +# --datasets_cache $(pwd)/nomad_temp/scratchdir/bactopia/datasets ./run-pipeline.sh -c secrets/nextflow.config secrets/main.nf + ./run-pipeline.sh -c tower/nextflow.config tower/main.nf else echo "skip local" fi