diff --git a/component-library/filter/filter.cwl b/component-library/filter/filter.cwl index 5f3105e0..393e9757 100644 --- a/component-library/filter/filter.cwl +++ b/component-library/filter/filter.cwl @@ -6,7 +6,7 @@ baseCommand: "claimed" inputs: component: type: string - default: romeokienzler/claimed-filter:0.1 + default: romeokienzler/claimed-filter:0.5 inputBinding: position: 1 prefix: --component @@ -28,6 +28,12 @@ inputs: inputBinding: position: 4 prefix: --file_name + output_file_name: + type: string + default: None + inputBinding: + position: 5 + prefix: --output_file_name outputs: [] diff --git a/component-library/filter/filter.ipynb b/component-library/filter/filter.ipynb index cb1958e6..7c0cfefd 100644 --- a/component-library/filter/filter.ipynb +++ b/component-library/filter/filter.ipynb @@ -49,7 +49,10 @@ "predicate = os.environ.get('predicate')\n", "\n", "# file name / path of the CSV file to read\n", - "file_name = os.environ.get('file_name')" + "file_name = os.environ.get('file_name')\n", + "\n", + "# output file name / path of the CSV file to write\n", + "output_file_name = os.environ.get('output_file_name')" ] }, { @@ -59,6 +62,7 @@ "metadata": {}, "outputs": [], "source": [ + "logging.debug('Opening file...')\n", "df = pd.read_csv(file_name)" ] }, @@ -69,6 +73,7 @@ "metadata": {}, "outputs": [], "source": [ + "logging.debug('Filtering...')\n", "exec('df = df[' + predicate + ']')" ] }, @@ -79,7 +84,9 @@ "metadata": {}, "outputs": [], "source": [ - "df.to_csv(file_name, index=False)" + "logging.debug('Writing file...')\n", + "df.to_csv(output_file_name, index=False)\n", + "logging.debug('Done')" ] } ], @@ -99,7 +106,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/component-library/filter/filter.job.yaml b/component-library/filter/filter.job.yaml index 31862b36..c57df545 100644 --- a/component-library/filter/filter.job.yaml +++ b/component-library/filter/filter.job.yaml @@ -7,7 +7,7 @@ spec: spec: containers: - name: filter - image: romeokienzler/claimed-filter:0.1 + image: romeokienzler/claimed-filter:0.5 workingDir: /opt/app-root/src/ command: ["/opt/app-root/bin/ipython","filter.py"] env: @@ -17,6 +17,8 @@ spec: value: value_of_predicate - name: file_name value: value_of_file_name + - name: output_file_name + value: value_of_output_file_name restartPolicy: OnFailure imagePullSecrets: - name: image_pull_secret \ No newline at end of file diff --git a/component-library/filter/filter.yaml b/component-library/filter/filter.yaml index 4a3080da..8e38ba7f 100644 --- a/component-library/filter/filter.yaml +++ b/component-library/filter/filter.yaml @@ -1,10 +1,11 @@ -name: file_name +name: output_file_name description: "# filter Filters rows based on predicate on pandas data frame Example 'predicate=~metadata.filename.str.contains('.gz') ' => filters all rows where column 'filename' contains '.gz' – CLAIMED V0.1" inputs: - {name: log_level, type: String, description: "update log level", default: "INFO"} - {name: predicate, type: String, description: "predicate (as described in documentation of the component)"} - {name: file_name, type: String, description: "file name / path of the CSV file to read"} +- {name: output_file_name, type: String, description: "output file name / path of the CSV file to write"} outputs: @@ -12,12 +13,13 @@ outputs: implementation: container: - image: romeokienzler/claimed-file_name:0.1 + image: romeokienzler/claimed-output_file_name:0.5 command: - sh - -ec - | - ipython ./filter.py log_level="${0}" predicate="${1}" file_name="${2}" + ipython ./filter.py log_level="${0}" predicate="${1}" file_name="${2}" output_file_name="${3}" - {inputValue: log_level} - {inputValue: predicate} - {inputValue: file_name} + - {inputValue: output_file_name}