Skip to content

Commit

Permalink
Merge pull request #248 from romeokienzler/main
Browse files Browse the repository at this point in the history
push filter operator to latest CLAIMED spec
  • Loading branch information
romeokienzler authored Jan 22, 2024
2 parents 2c1239e + d7f1300 commit 53b9d4c
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 19 deletions.
33 changes: 33 additions & 0 deletions component-library/filter/filter.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
cwlVersion: v1.2
class: CommandLineTool

baseCommand: "claimed"

inputs:
component:
type: string
default: romeokienzler/claimed-filter:0.1
inputBinding:
position: 1
prefix: --component
log_level:
type: string
default: "INFO"
inputBinding:
position: 2
prefix: --log_level
predicate:
type: string
default: None
inputBinding:
position: 3
prefix: --predicate
file_name:
type: string
default: None
inputBinding:
position: 4
prefix: --file_name


outputs: []
38 changes: 19 additions & 19 deletions component-library/filter/filter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,51 +5,51 @@
"id": "arabic-honey",
"metadata": {},
"source": [
"# Filters rows based on predicate on pandas data frame\n",
"Example \"predicate=~metadata.filename.str.contains('.gz') \" => filters all rows where column \"filename\" contains '.gz\""
"# filter"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fabulous-israeli",
"cell_type": "markdown",
"id": "ebe4a7f0",
"metadata": {},
"outputs": [],
"source": [
"# @param predicate (as described in documentation of the component)\n",
"# @param file_name csv file name"
"Filters rows based on predicate on pandas data frame\n",
"Example \"predicate=~metadata.filename.str.contains('.gz') \" => filters all rows where column \"filename\" contains '.gz\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "upset-affair",
"id": "fabulous-israeli",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"predicate = os.environ.get('predicate')\n",
"file_name = os.environ.get('file_name', 'metadata.csv')"
"!pip install pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "incomplete-monkey",
"id": "d92abd54",
"metadata": {},
"outputs": [],
"source": [
"!pip3 install pandas==1.2.1"
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "auburn-wonder",
"id": "upset-affair",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
"# predicate (as described in documentation of the component)\n",
"predicate = os.environ.get('predicate')\n",
"\n",
"# file name / path of the CSV file to read\n",
"file_name = os.environ.get('file_name')"
]
},
{
Expand All @@ -59,7 +59,7 @@
"metadata": {},
"outputs": [],
"source": [
"metadata = pd.read_csv(file_name)"
"df = pd.read_csv(file_name)"
]
},
{
Expand All @@ -69,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
"exec('metadata = metadata[' + predicate + ']')"
"exec('df = df[' + predicate + ']')"
]
},
{
Expand All @@ -79,7 +79,7 @@
"metadata": {},
"outputs": [],
"source": [
"metadata.to_csv(file_name, index=False)"
"df.to_csv(file_name, index=False)"
]
}
],
Expand Down
22 changes: 22 additions & 0 deletions component-library/filter/filter.job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: batch/v1
kind: Job
metadata:
name: filter
spec:
template:
spec:
containers:
- name: filter
image: romeokienzler/claimed-filter:0.1
workingDir: /opt/app-root/src/
command: ["/opt/app-root/bin/ipython","filter.py"]
env:
- name: log_level
value: value_of_log_level
- name: predicate
value: value_of_predicate
- name: file_name
value: value_of_file_name
restartPolicy: OnFailure
imagePullSecrets:
- name: image_pull_secret
23 changes: 23 additions & 0 deletions component-library/filter/filter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: file_name
description: "# filter Filters rows based on predicate on pandas data frame Example 'predicate=~metadata.filename.str.contains('.gz') ' => filters all rows where column 'filename' contains '.gz' – CLAIMED V0.1"

inputs:
- {name: log_level, type: String, description: "update log level", default: "INFO"}
- {name: predicate, type: String, description: "predicate (as described in documentation of the component)"}
- {name: file_name, type: String, description: "file name / path of the CSV file to read"}


outputs:


implementation:
container:
image: romeokienzler/claimed-file_name:0.1
command:
- sh
- -ec
- |
ipython ./filter.py log_level="${0}" predicate="${1}" file_name="${2}"
- {inputValue: log_level}
- {inputValue: predicate}
- {inputValue: file_name}

0 comments on commit 53b9d4c

Please sign in to comment.