diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 177eca69d5..e6eaa012ef 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -735,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation. f"Unrecognised type for collation copyfile metadata of {fld}, {collation}" ) return mode, collation + + +def parse_format_string(fmtstr): + """Parse a argstr format string and return all keywords used in it.""" + identifier = r"[a-zA-Z_]\w*" + attribute = rf"\.{identifier}" + item = r"\[\w+\]" + # Example: var.attr[key][0].attr2 (capture "var") + field_with_lookups = ( + f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword + ) + conversion = "(?:!r|!s)" + nobrace = "[^{}]*" + # Example: 0{pads[hex]}x (capture "pads") + fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec + full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}" + + all_keywords = re.findall(full_field, fmtstr) + return set().union(*all_keywords) - {""} diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 7061d5badd..48fd6e3120 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -3,6 +3,7 @@ from pathlib import Path import random import platform +import typing as ty import pytest import attrs import cloudpickle as cp @@ -17,6 +18,7 @@ position_sort, parse_copyfile, argstr_formatting, + parse_format_string, ) from ...utils.hash import hash_function from ..core import Workflow @@ -51,7 +53,7 @@ def test_hash_file(tmpdir): with open(outdir / "test.file", "w") as fp: fp.write("test") assert ( - hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299" + hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22" ) @@ -330,3 +332,33 @@ class Inputs: ) == "1 2.000000 -test 3 -me 4" ) + + +def test_parse_format_string1(): + assert parse_format_string("{a}") == {"a"} + + +def test_parse_format_string2(): + assert parse_format_string("{abc}") == {"abc"} + + +def test_parse_format_string3(): + assert parse_format_string("{a:{b}}") == {"a", "b"} + + +def test_parse_format_string4(): + assert parse_format_string("{a:{b[2]}}") == {"a", "b"} + + +def test_parse_format_string5(): + assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"} + + +def test_parse_format_string6(): + assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"} + + +def test_parse_format_string7(): + assert parse_format_string( + "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}" + ) == {"a1_field", "b2_field", "c3_field", "d4_field"}