From cf296819c1b2f8cb86b0997f29a805ee2fb16f28 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 15:14:05 +0200 Subject: [PATCH 01/10] suport hpc --- CHANGELOG.md | 4 ++++ cloudos/__main__.py | 20 +++++++++++++++++--- cloudos/_version.py | 2 +- cloudos/jobs/job.py | 6 +++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 229c438..1694d19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.8.0 (2024-0405) + +- Adds support for using CloudOS HPC executor. + ## v2.7.0 (2024-03-21) ### Feature diff --git a/cloudos/__main__.py b/cloudos/__main__.py index aed834f..27b6889 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -190,7 +190,7 @@ def queue(): default='github') @click.option('--execution-platform', help='Name of the execution platform implemented in your CloudOS. Default=aws.', - type=click.Choice(['aws', 'azure']), + type=click.Choice(['aws', 'azure', 'hpc']), default='aws') @click.option('--cost-limit', help='Add a cost limit to your job. Default=30.0 (For no cost limit please use -1).', @@ -249,9 +249,11 @@ def run(apikey, if instance_type == 'NONE_SELECTED': if execution_platform == 'aws': instance_type = 'c5.xlarge' - if execution_platform == 'azure': + elif execution_platform == 'azure': instance_type = 'Standard_D4as_v4' - if execution_platform == 'azure': + else: + instance_type = None + if execution_platform == 'azure' or execution_platform == 'hpc': batch = None spot = None elif ignite: @@ -261,10 +263,22 @@ def run(apikey, 'CloudOS\n') else: batch = True + if execution_platform == 'hpc': + print('\nHPC execution platform selected') + print('[Message] Please, take into account that HPC execution do not support ' + + 'the following parameters and all of them will be ignored:\n' + + '\t--resumable\n' + + '\t--job-queue\n' + + '\t--instance-type | --instance-disk | --spot | --cost-limit\n' + + '\t--storage-mode | --lustre-size\n' + + '\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n') if verbose: print('\t...Detecting workflow type') cl = Cloudos(cloudos_url, apikey, cromwell_token) workflow_type = cl.detect_workflow(workflow_name, workspace_id, verify_ssl) + if execution_platform == 'hpc' and workflow_type == 'wdl': + raise ValueError(f'The workflow {workflow_name} is a WDL workflow. ' + + 'WDL is not supported on HPC execution platform.') if workflow_type == 'wdl': print('\tWDL workflow detected\n') if wdl_mainfile is None: diff --git a/cloudos/_version.py b/cloudos/_version.py index efc8b7b..f2df444 100644 --- a/cloudos/_version.py +++ b/cloudos/_version.py @@ -1 +1 @@ -__version__ = '2.7.0' \ No newline at end of file +__version__ = '2.8.0' diff --git a/cloudos/jobs/job.py b/cloudos/jobs/job.py index ebe7d2e..ffaaca5 100644 --- a/cloudos/jobs/job.py +++ b/cloudos/jobs/job.py @@ -382,7 +382,10 @@ def convert_nextflow_to_json(self, if storage_mode not in ['lustre', 'regular']: raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' + f'{storage_mode} is not allowed') - + if execution_platform == 'hpc': + hpc_id = '660fae20f93358ad61e0104b' + else: + hpc_id = None params = { "parameters": workflow_params, "project": project_id, @@ -396,6 +399,7 @@ def convert_nextflow_to_json(self, }, "cromwellCloudResources": cromwell_id, "executionPlatform": execution_platform, + "hpc": hpc_id, "storageSizeInGb": instance_disk, "execution": { "computeCostLimit": cost_limit, From 97f37c342ab41e3937a58b773506a80a0e8008ca Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 15:21:38 +0200 Subject: [PATCH 02/10] update pytest --- tests/test_data/convert_nextflow_to_json_params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data/convert_nextflow_to_json_params.json b/tests/test_data/convert_nextflow_to_json_params.json index 9bec671..c0f841f 100644 --- a/tests/test_data/convert_nextflow_to_json_params.json +++ b/tests/test_data/convert_nextflow_to_json_params.json @@ -1 +1 @@ -{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}} +{"parameters": [{"prefix": "--", "name": "reads", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data"}, {"prefix": "--", "name": "genome", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"}, {"prefix": "--", "name": "annot", "parameterKind": "textValue", "textValue": "s3://lifebit-featured-datasets/pipelines/rnatoy-data/ggal_1_48850000_49020000.bed.gff"}], "project": "6054754029b82f0112762b9c", "workflow": "60b0ca54303ee601a69b42d1", "name": "new_job", "resumable": true, "batch": {"dockerLogin": false, "enabled": false, "jobQueue": null}, "cromwellCloudResources": null, "executionPlatform": "aws", "hpc": null ,"storageSizeInGb": 500, "execution": {"computeCostLimit": -1, "optim": "test"}, "lusterFsxStorageSizeInGb": 1200, "storageMode": "regular", "revision": "", "profile": null, "spotInstances": {"instanceType": "c5.xlarge", "onDemandFallbackInstanceType": "c5.xlarge"}, "masterInstance": {"requestedInstance": {"type": "c5.xlarge", "asSpot": false}}} From 4bdc5190d930da5af6e3f28a0b32293af766df34 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 16:05:49 +0200 Subject: [PATCH 03/10] params changes --- cloudos/jobs/job.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cloudos/jobs/job.py b/cloudos/jobs/job.py index ffaaca5..58dafb4 100644 --- a/cloudos/jobs/job.py +++ b/cloudos/jobs/job.py @@ -384,6 +384,8 @@ def convert_nextflow_to_json(self, f'{storage_mode} is not allowed') if execution_platform == 'hpc': hpc_id = '660fae20f93358ad61e0104b' + resumable = False + cost_limit = -1 else: hpc_id = None params = { @@ -409,14 +411,15 @@ def convert_nextflow_to_json(self, "storageMode": storage_mode, "revision": revision_block, "profile": nextflow_profile, - instance: instance_type_block, - "masterInstance": { + instance: instance_type_block + } + if execution_platform != 'hpc': + params['masterInstance'] = { "requestedInstance": { "type": instance_type, "asSpot": False } } - } return params def send_job(self, From 97e4071be7091773ac39d2d4f6c0f9aebe265069 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 16:33:19 +0200 Subject: [PATCH 04/10] adding a param for hpc_id --- cloudos/__main__.py | 8 ++++++++ cloudos/jobs/job.py | 17 +++++++++-------- .../test_jobs/test_convert_nextflow_to_json.py | 3 +++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cloudos/__main__.py b/cloudos/__main__.py index 27b6889..de2eaaf 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -192,6 +192,10 @@ def queue(): help='Name of the execution platform implemented in your CloudOS. Default=aws.', type=click.Choice(['aws', 'azure', 'hpc']), default='aws') +@click.option('--hpc-id', + help=('ID of your HPC, only applicable when execution-platform=hpc. ' + + 'Default=660fae20f93358ad61e0104b'), + default='660fae20f93358ad61e0104b') @click.option('--cost-limit', help='Add a cost limit to your job. Default=30.0 (For no cost limit please use -1).', type=float, @@ -238,6 +242,7 @@ def run(apikey, cromwell_token, repository_platform, execution_platform, + hpc_id, cost_limit, verbose, request_interval, @@ -265,6 +270,8 @@ def run(apikey, batch = True if execution_platform == 'hpc': print('\nHPC execution platform selected') + if hpc_id is None: + raise ValueError('Please, specify your HPC ID using --hpc parameter') print('[Message] Please, take into account that HPC execution do not support ' + 'the following parameters and all of them will be ignored:\n' + '\t--resumable\n' + @@ -339,6 +346,7 @@ def run(apikey, storage_mode=storage_mode, lustre_size=lustre_size, execution_platform=execution_platform, + hpc_id=hpc_id, workflow_type=workflow_type, cromwell_id=cromwell_id, cost_limit=cost_limit, diff --git a/cloudos/jobs/job.py b/cloudos/jobs/job.py index 58dafb4..b3fd916 100644 --- a/cloudos/jobs/job.py +++ b/cloudos/jobs/job.py @@ -199,6 +199,7 @@ def convert_nextflow_to_json(self, storage_mode, lustre_size, execution_platform, + hpc_id, workflow_type, cromwell_id, cost_limit): @@ -248,8 +249,10 @@ def convert_nextflow_to_json(self, lustre_size : int The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or a multiple of it. - execution_platform : string ['aws'|'azure'] + execution_platform : string ['aws'|'azure'|'hpc'] The execution platform implemented in your CloudOS. + hpc_id : string + The ID of your HPC in CloudOS. workflow_type : str The type of workflow to run. Either 'nextflow' or 'wdl'. cromwell_id : str @@ -382,12 +385,6 @@ def convert_nextflow_to_json(self, if storage_mode not in ['lustre', 'regular']: raise ValueError('Please, use either \'lustre\' or \'regular\' for --storage-mode ' + f'{storage_mode} is not allowed') - if execution_platform == 'hpc': - hpc_id = '660fae20f93358ad61e0104b' - resumable = False - cost_limit = -1 - else: - hpc_id = None params = { "parameters": workflow_params, "project": project_id, @@ -439,6 +436,7 @@ def send_job(self, storage_mode='regular', lustre_size=1200, execution_platform='aws', + hpc_id=None, workflow_type='nextflow', cromwell_id=None, cost_limit=30.0, @@ -485,8 +483,10 @@ def send_job(self, lustre_size : int The lustre storage to be used when --storage-mode=lustre, in GB. It should be 1200 or a multiple of it. - execution_platform : string ['aws'|'azure'] + execution_platform : string ['aws'|'azure'|'hpc'] The execution platform implemented in your CloudOS. + hpc_id : string + The ID of your HPC in CloudOS. workflow_type : str The type of workflow to run. Either 'nextflow' or 'wdl'. cromwell_id : str @@ -531,6 +531,7 @@ def send_job(self, storage_mode, lustre_size, execution_platform, + hpc_id, workflow_type, cromwell_id, cost_limit) diff --git a/tests/test_jobs/test_convert_nextflow_to_json.py b/tests/test_jobs/test_convert_nextflow_to_json.py index e48341c..46fdc27 100644 --- a/tests/test_jobs/test_convert_nextflow_to_json.py +++ b/tests/test_jobs/test_convert_nextflow_to_json.py @@ -24,6 +24,7 @@ "storage_mode": 'regular', "lustre_size": 1200, "execution_platform": "aws", + "hpc_id": None, "workflow_type": 'nextflow', "cromwell_id": None, "cost_limit": -1 @@ -50,6 +51,7 @@ def test_convert_nextflow_to_json_output_correct(): storage_mode=param_dict["storage_mode"], lustre_size=param_dict["lustre_size"], execution_platform=param_dict["execution_platform"], + hpc_id=param_dict["hpc_id"], workflow_type=param_dict["workflow_type"], cromwell_id=param_dict["cromwell_id"], cost_limit=param_dict["cost_limit"] @@ -81,6 +83,7 @@ def test_convert_nextflow_to_json_badly_formed_config(): storage_mode=param_dict["storage_mode"], lustre_size=param_dict["lustre_size"], execution_platform=param_dict["execution_platform"], + hpc_id=param_dict["hpc_id"], workflow_type=param_dict["workflow_type"], cromwell_id=param_dict["cromwell_id"], cost_limit=param_dict["cost_limit"] From 6619634f734251ab530e24af9f7da219307cb2fa Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 16:42:03 +0200 Subject: [PATCH 05/10] update docs --- README.md | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3a804db..02c297f 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,12 @@ Options: backwards compatibility. --repository-platform TEXT Name of the repository platform of the workflow. Default=github. - --execution-platform [aws|azure] + --execution-platform [aws|azure|hpc] Name of the execution platform implemented in your CloudOS. Default=aws. + --hpc-id TEXT ID of your HPC, only applicable when + execution-platform=hpc. + Default=660fae20f93358ad61e0104b --cost-limit FLOAT Add a cost limit to your job. Default=30.0 (For no cost limit please use -1). --verbose Whether to print information messages or @@ -318,6 +321,35 @@ cloudos job run \ --execution-platform azure ``` +#### HPC execution support + +CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command: + +- Use the following parameter: `--execution-platform hpc`. +- Indicate the HPC ID in your CloudOS using: `--hpc-id XXXX`. + +Example command: + +```bash +cloudos job run \ + --cloudos-url $CLOUDOS \ + --apikey $MY_API_KEY \ + --workspace-id $WORKSPACE_ID \ + --project-name "$PROJECT_NAME" \ + --workflow-name $WORKFLOW_NAME \ + --job-config $JOB_PARAMS \ + --execution-platform hpc \ + --hpc_id $YOUR_HPC_ID +``` + +Please, note that HPC execution do not support the following parameters and all of them will be ignored: + +- `--resumable` +- `--job-queue` +- `--instance-type` | `--instance-disk` | `--spot` | `--cost-limit` +- `--storage-mode` | `--lustre-size` +- `--wdl-mainfile` | `--wdl-importsfile` | `--cromwell-token` + #### Check job status To check the status of a submitted job, just use the suggested command: From b05366ad6f2c15606b9f895e58a7c018b5dda2d7 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 16:58:44 +0200 Subject: [PATCH 06/10] minor readme update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02c297f..b9a98bb 100644 --- a/README.md +++ b/README.md @@ -326,7 +326,7 @@ cloudos job run \ CloudOS is also prepared to use an HPC compute infrastructure. For such cases, you will need to take into account the following for your job submissions using `cloudos job run` command: - Use the following parameter: `--execution-platform hpc`. -- Indicate the HPC ID in your CloudOS using: `--hpc-id XXXX`. +- Indicate the HPC ID using: `--hpc-id XXXX`. Example command: From 6525e9d16d8c77cbf54e0865e0cd0b7db717bd05 Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 17:49:32 +0200 Subject: [PATCH 07/10] removing wdl params --- cloudos/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cloudos/__main__.py b/cloudos/__main__.py index de2eaaf..3aa07ec 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -279,6 +279,8 @@ def run(apikey, '\t--instance-type | --instance-disk | --spot | --cost-limit\n' + '\t--storage-mode | --lustre-size\n' + '\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n') + wdl_mainfile = None + wdl_importsfile = None if verbose: print('\t...Detecting workflow type') cl = Cloudos(cloudos_url, apikey, cromwell_token) From f90363670fa2b64f02789267921d0625b1e53aee Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 17:57:03 +0200 Subject: [PATCH 08/10] default storage mode --- cloudos/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cloudos/__main__.py b/cloudos/__main__.py index 3aa07ec..9643a05 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -281,6 +281,7 @@ def run(apikey, '\t--wdl-mainfile | --wdl-importsfile | --cromwell-token\n') wdl_mainfile = None wdl_importsfile = None + storage_mode = 'regular' if verbose: print('\t...Detecting workflow type') cl = Cloudos(cloudos_url, apikey, cromwell_token) From 35e22bc5c26d3e7412ea3eea43fcf63bb9b2034c Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 18:53:29 +0200 Subject: [PATCH 09/10] mini-typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b9a98bb..58c2c64 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,7 @@ cloudos job run \ --workflow-name $WORKFLOW_NAME \ --job-config $JOB_PARAMS \ --execution-platform hpc \ - --hpc_id $YOUR_HPC_ID + --hpc-id $YOUR_HPC_ID ``` Please, note that HPC execution do not support the following parameters and all of them will be ignored: From 5d361a5f053d3ccbb46a344e2cfd2376c8f4ba5d Mon Sep 17 00:00:00 2001 From: dapineyro Date: Fri, 5 Apr 2024 21:36:16 +0200 Subject: [PATCH 10/10] review suggestions --- CHANGELOG.md | 2 +- README.md | 2 +- cloudos/__main__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1694d19..c61eccf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## lifebit-ai/cloudos-cli: changelog -## v2.8.0 (2024-0405) +## v2.8.0 (2024-04-05) - Adds support for using CloudOS HPC executor. diff --git a/README.md b/README.md index 58c2c64..dd3020a 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ Options: Name of the execution platform implemented in your CloudOS. Default=aws. --hpc-id TEXT ID of your HPC, only applicable when - execution-platform=hpc. + --execution-platform=hpc. Default=660fae20f93358ad61e0104b --cost-limit FLOAT Add a cost limit to your job. Default=30.0 (For no cost limit please use -1). diff --git a/cloudos/__main__.py b/cloudos/__main__.py index 9643a05..f33597a 100644 --- a/cloudos/__main__.py +++ b/cloudos/__main__.py @@ -193,7 +193,7 @@ def queue(): type=click.Choice(['aws', 'azure', 'hpc']), default='aws') @click.option('--hpc-id', - help=('ID of your HPC, only applicable when execution-platform=hpc. ' + + help=('ID of your HPC, only applicable when --execution-platform=hpc. ' + 'Default=660fae20f93358ad61e0104b'), default='660fae20f93358ad61e0104b') @click.option('--cost-limit',