diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index 5d949df16..dd34664e2 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -11,54 +11,70 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Command-line template\n", + "## Command-line templates\n", "\n", - "Define a shell-task specification using a command template string. Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix." + "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "\n", + "Cp = shell.define(\"cp \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", + "\n", + "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[outarg(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file'), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n" - ] - }, - { - "ename": "TypeError", - "evalue": "cp.__init__() got an unexpected keyword argument 'in_file'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(list_fields(Cp))\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# Parameterise the task spec\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m cp \u001b[38;5;241m=\u001b[39m \u001b[43mCp\u001b[49m\u001b[43m(\u001b[49m\u001b[43min_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./out.txt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Print the cmdline to be run to double check\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(cp\u001b[38;5;241m.\u001b[39mcmdline)\n", - "\u001b[0;31mTypeError\u001b[0m: cp.__init__() got an unexpected keyword argument 'in_file'" + "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt\n", + "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt'): 'Contents to be copied'\n" ] } ], "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", "from pydra.design import shell\n", "from pydra.engine.helpers import list_fields\n", "\n", - "test_file = \"./in.txt\"\n", + "# Make a test file to copy\n", + "test_dir = Path(mkdtemp())\n", + "test_file = test_dir / \"in.txt\"\n", "with open(test_file, \"w\") as f:\n", - " f.write(\"this is a test file\\n\")\n", - "\n", - "# Define the shell-command task specification\n", - "Cp = shell.define(\"cp \")\n", + " f.write(\"Contents to be copied\")\n", "\n", "# Parameterise the task spec\n", - "cp = Cp(in_file=test_file, out_file=\"./out.txt\")\n", + "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", "\n", "# Print the cmdline to be run to double check\n", - "print(cp.cmdline)\n", + "print(f\"Command-line to be run: {cp.cmdline}\")\n", "\n", "# Run the shell-comand task\n", - "cp()" + "result = cp()\n", + "\n", + "print(\n", + " f\"Contents of copied file ('{result.output.destination}'): \"\n", + " f\"'{Path(result.output.destination).read_text()}'\"\n", + ")" ] }, { @@ -70,9 +86,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt True\n" + ] + } + ], "source": [ "cp = Cp(in_file=test_file)\n", "print(cp.cmdline)" diff --git a/pydra/design/shell.py b/pydra/design/shell.py index b654acd77..544bc30ff 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -189,6 +189,7 @@ class outarg(Out, arg): """ path_template: str | None = attrs.field(default=None) + keep_extension: bool = attrs.field(default=False) @path_template.validator def _validate_path_template(self, attribute, value): @@ -198,6 +199,14 @@ def _validate_path_template(self, attribute, value): f"({self.default!r}) is provided" ) + @keep_extension.validator + def _validate_keep_extension(self, attribute, value): + if value and self.path_template is not None: + raise ValueError( + f"keep_extension ({value!r}) can only be provided when path_template " + f"is provided" + ) + @dataclass_transform( kw_only_default=True, @@ -465,7 +474,7 @@ def parse_command_line_template( outputs = {} parts = template.split() executable = [] - for i, part in enumerate(parts, start=1): + for i, part in enumerate(parts): if part.startswith("<") or part.startswith("-"): break executable.append(part) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 339ae2ba6..7b1e127ad 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -10,7 +10,7 @@ from contextlib import contextmanager import attr from fileformats.core import FileSet -from pydra.engine.helpers import is_lazy, attrs_values +from pydra.engine.helpers import is_lazy, attrs_values, list_fields logger = logging.getLogger("pydra") @@ -114,17 +114,18 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): k = k.split(".")[1] inputs_dict_st[k] = inputs_dict_st[k][v] - from .specs import attrs_fields + from pydra.design import shell # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in attrs_fields(inputs) - if field.metadata.get("output_file_template") + for field in list_fields(inputs) + if isinstance(field, shell.outarg) + and field.path_template and getattr(inputs, field.name) is not False and all( - getattr(inputs, required_field) is not attr.NOTHING - for required_field in field.metadata.get("requires", ()) + getattr(inputs, required_field) is not None + for required_field in field.requires ) ] @@ -151,8 +152,7 @@ def template_update_single( """ # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object - from pydra.utils.typing import TypeParser # noqa - from pydra.engine.specs import OUTPUT_TEMPLATE_TYPES + from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa if inputs_dict_st is None: inputs_dict_st = attrs_values(inputs) @@ -200,9 +200,23 @@ def _template_formatting(field, inputs, inputs_dict_st): returning a list of formatted templates in that case. Allowing for multiple input values used in the template as longs as there is no more than one file (i.e. File, PathLike or string with extensions) + + Parameters + ---------- + field : pydra.engine.helpers.Field + field with a template + inputs : pydra.engine.helpers.Input + inputs object + inputs_dict_st : dict + dictionary with values from inputs object + + Returns + ------- + formatted : str or list + formatted template """ # if a template is a function it has to be run first with the inputs as the only arg - template = field.metadata["output_file_template"] + template = field.path_template if callable(template): template = template(inputs) @@ -219,9 +233,8 @@ def _template_formatting(field, inputs, inputs_dict_st): def _string_template_formatting(field, template, inputs, inputs_dict_st): - from .specs import MultiInputObj, MultiOutputFile + from pydra.utils.typing import MultiInputObj, MultiOutputFile - keep_extension = field.metadata.get("keep_extension", True) inp_fields = re.findall(r"{\w+}", template) inp_fields_fl = re.findall(r"{\w+:[0-9.]+f}", template) inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_fl] @@ -281,17 +294,25 @@ def _string_template_formatting(field, template, inputs, inputs_dict_st): formatted_value.append( _element_formatting( - template, val_dict_el, file_template, keep_extension=keep_extension + template, + val_dict_el, + file_template, + keep_extension=field.keep_extension, ) ) else: formatted_value = _element_formatting( - template, val_dict, file_template, keep_extension=keep_extension + template, val_dict, file_template, keep_extension=field.keep_extension ) return formatted_value -def _element_formatting(template, values_template_dict, file_template, keep_extension): +def _element_formatting( + template: str, + values_template_dict: dict[str, ty.Any], + file_template: str, + keep_extension: bool, +): """Formatting a single template for a single element (if a list). Taking into account that a file used in the template (file_template) and the template itself could have file extensions diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 22331afcc..e18cbaada 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -494,7 +494,7 @@ def from_task( ) # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - if isinstance(fld, shell.outarg) and is_set(getattr(task.inputs, fld.name)): + if isinstance(fld, shell.outarg) and is_set(getattr(task.spec, fld.name)): resolved_value = getattr(task.spec, fld.name) elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) @@ -691,10 +691,20 @@ def _command_args( else: if name in modified_inputs: pos_val = self._command_pos_args( - field, value, output_dir, root=root + field=field, + value=value, + inputs=inputs, + root=root, + output_dir=output_dir, ) else: - pos_val = self._command_pos_args(field, value, output_dir, inputs) + pos_val = self._command_pos_args( + field=field, + value=value, + output_dir=output_dir, + inputs=inputs, + root=root, + ) if pos_val: pos_args.append(pos_val) @@ -755,7 +765,7 @@ def _command_pos_args( # Shift negatives down to allow args to be -1 field.position += 1 if field.position >= 0 else -1 - if value: + if value and isinstance(value, str): if root: # values from templates value = value.replace(str(output_dir), f"{root}{output_dir}")