Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

otk/utils: introduce Annotated* objects #280

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 253 additions & 0 deletions src/otk/annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import copy
import os
import pathlib
from typing import TypeVar, Generic, Any


class AnnotatedBase:
""" Implements a base class for annotated objects.
The annotations are basically a dict[str, any] of properties to attach to the class.
"""

# base directory to calculate relative paths (easier to read)
_basedir = os.path.curdir

def __init__(self) -> None:
self._otk_annotations: dict[str, Any] = {}
if isinstance(self, list):
for idx, value in enumerate(self):
self[idx] = self.deep_convert(value)
elif isinstance(self, dict):
for key, value in self.items():
self[key] = self.deep_convert(value)

def get_annotation(self, key: str, default: Any | None = None) -> Any | None:
if key in self._otk_annotations:
return self._otk_annotations[key]
return default

def set_annotation(self, key: str, val: Any) -> Any:
self._otk_annotations[key] = val

def get_annotations(self) -> dict[str, Any]:
"""Returns all annotations"""
return self._otk_annotations

def set_annotations(self, annotations: dict[str, Any]) -> None:
""" set all annotations """
self._otk_annotations = annotations

@classmethod
def deep_convert(cls, data: Any) -> Any:
"""Recursively convert nested dicts and lists into AnnotatedDict and HiddenAttrList."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT this is only needed when we we load data from the externals where it comes in via json. might be worth a comment. For the yaml the values are constructed from the ground up, so if we hook into the constructoin we always have fully Annoated* things.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I think this might be obsolete when also all tests use annotated objects when interacting with internals. I wanted to collect feedback first before changing all tests.

ret = data
if isinstance(data, AnnotatedBase):
return ret
if isinstance(data, dict):
ret = AnnotatedDict({key: cls.deep_convert(value) for key, value in data.items()})
elif isinstance(data, list):
ret = AnnotatedList([cls.deep_convert(item) for item in data])
elif isinstance(data, str):
ret = AnnotatedStr(data)
elif isinstance(data, bool):
ret = AnnotatedBool(data)
elif isinstance(data, int):
ret = AnnotatedInt(data)
elif isinstance(data, float):
ret = AnnotatedFloat(data)

# note: "complex" is not implemented as it's not used in YAML

return ret

@classmethod
def deep_dump(cls, data: Any) -> Any:
""" Converting to builtin types is usually not necessary.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT this can be done in a slightly simpler way with a custom json encoder, c.f. https://github.com/osbuild/otk/compare/main...mvo5:annotations-2?expand=1#diff-7a465d7d1dc04950d6fb0e7f8e2a314b952542824dd3ef0145b7dc00947f67d8R65 - it is only needed for custom bools (which is a bit sad)

This special conversion is mainly for json.dumps(). json library needs "bool" to pass the
condition "<var> is True" which does not apply to AnnotatedBool,
so we'll just convert all
"""
ret = data
if isinstance(data, AnnotatedDict):
ret = {key: cls.deep_dump(value) for key, value in data.items()}
elif isinstance(data, AnnotatedList):
ret = [cls.deep_dump(item) for item in data]
elif isinstance(data, AnnotatedBool):
ret = data.value
elif isinstance(data, AnnotatedFloat):
ret = float(data)
elif isinstance(data, AnnotatedInt):
ret = int(data)
elif isinstance(data, AnnotatedStr):
ret = str(data)

return ret

def add_source_attributes(self, key_data, prefix=""):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This add a tight coupling of AnnotatedBase to an yaml node, i.e. Annotated node now needs to know details of yaml nodes, having the helper outside would reduce this coupling.

line_number = key_data.start_mark.line + 1
line_number_end = key_data.end_mark.line + 1
column = key_data.start_mark.column + 1
column_end = key_data.end_mark.column + 1
filename = os.path.relpath(key_data.start_mark.name, self._basedir)
self.set_annotation(f"{prefix}src", f"{filename}:{line_number}")
self.set_annotation(f"{prefix}filename", filename)
self.set_annotation(f"{prefix}line_number", line_number)
self.set_annotation(f"{prefix}line_number_end", line_number_end)
self.set_annotation(f"{prefix}column", column)
self.set_annotation(f"{prefix}column_end", column_end)

def __add__(self, o):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

afaict the various *add is only used in a test, could we YAGNI it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will also be used when implementing more otk operations?

ret = self.deep_convert(super().__add__(o)) # pylint: disable=E1101
ret.set_annotations(self.squash_annotations([self, o]))
return ret

def __iadd__(self, o):
return self.__add__(o)

def __radd__(self, o):
return self.deep_convert(o).__add__(self)

def __deepcopy__(self, memo):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to avoid this, this is a pretty complex function and if I remove it only the "FAILED test/test_example.py::test_errors[/home/mvogt/devel/otk/test/data/error/01-circular.yaml] - AssertionError: assert 'circular include detected:\ntest/data/error/01-circ..." for a "real-life" test fails, could this be archived in another way maybe (e.g. via something like otk_dup() ?)

if isinstance(self, AnnotatedDict):
cls = self.__class__
result = cls.__new__(cls)
memo[id(self)] = result

for k, v in self.items():
result[k] = copy.deepcopy(v, memo)

elif isinstance(self, AnnotatedList):
cls = self.__class__
result = cls.__new__(cls)
memo[id(self)] = result

for v in self:
result.append(copy.deepcopy(v, memo))
elif isinstance(self, (AnnotatedStr, AnnotatedInt, AnnotatedFloat, AnnotatedBool, AnnotatedPath)):
result = copy.copy(self)
else:
result = None # just for the linter - that should never happen

result.set_annotations(self.get_annotations().copy())

return result

@classmethod
def squash_annotations(cls, annotation_list: list) -> dict:
""" just aggregates all annotations in the list.

new annotation keys get appended to the resulting dict

duplicate annotations will get converted to string
(if you need an exception for a key please implement here)

for "src" there is an exception
this is expected to be in the form "filename:line_number"
and will be sanely unified
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How will this be used in the real code? If I remove it I only see the unit test failing but no test that shows how this is used in a otk document

Comment on lines +137 to +148
Copy link
Member

@achilleas-k achilleas-k Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a rather poor docstring for a method that isn't very obvious or easy to read.

The first line starting with "just" doesn't help. "Aggregates all annotations in the list" would be a good first line for it, the "just" makes it sound like things will be simple, but the function body suggests otherwise. But I would also add "into a dictionary keyed by [something]".

"New annotation keys get appended to the dict". Isn't every key here new? How can there be old annotation keys? This looks like a pure function (it's annotated as a classmethod but it's actually static right? cls isn't used).

"Duplicate annotations will get converted to a string". Does this mean unique annotations are different types? What does that mean for the result?

"(if you need an exception for a key please implement here)". I would change this to:
"Exceptions for specific keys are implemented here."

and then follow it with:

Key exceptions:
- src: ...

But also, I'm not entirely clear what the standard behaviour is like so I don't really understand what the exception is excepting.

ret: dict[str, Any] = {}
files: dict[str, str] = {}
for item in annotation_list:
if not isinstance(item, AnnotatedBase):
continue
annotations = item.get_annotations()
for k in annotations.keys():
if k == "src":
src = annotations[k].split(":")
if src[0] in files:
files[src[0]] += ", " + src[1]
else:
files[src[0]] = src[1]
else:
if k in ret:
ret[k] = str(ret[k]) + str(annotations[k])
else:
ret[k] = annotations[k]

ret["src"] = "\n".join([f"* {k}:{v}" for k, v in files.items()])

return ret


AnnotatedDictKeyT = TypeVar('AnnotatedDictKeyT')
AnnotatedDictVarT = TypeVar('AnnotatedDictVarT')

AnnotatedListT = TypeVar('AnnotatedListT')


class AnnotatedList(Generic[AnnotatedListT], AnnotatedBase, list):
def __init__(self, seq: list[Any] | None = None) -> None:
if seq is None:
seq = []
list.__init__(self, seq)
AnnotatedBase.__init__(self)


class AnnotatedDict(Generic[AnnotatedDictKeyT, AnnotatedDictVarT], AnnotatedBase, dict):
def __init__(self, seq: dict[Any, Any] | None = None, **kwargs: dict[Any, Any]) -> None:
if seq is None:
seq = {}
dict.__init__(self, seq, **kwargs)
AnnotatedBase.__init__(self)


class AnnotatedPath(AnnotatedBase, pathlib.Path):
def __init__(self, *args: Any, **kwargs: Any) -> None:
pathlib.Path.__init__(self, *args, **kwargs)
AnnotatedBase.__init__(self)

def fspath_with_include(self) -> str:
src = None
filename = os.path.relpath(os.fspath(self), self._basedir)
try:
src = self.get_annotation("src")
except KeyError:
pass
if src is not None:
return f"{filename} (included from {src})"
return filename


class AnnotatedStr(AnnotatedBase, str):
def __init__(self, *args: str) -> None: # pylint: disable=W0613
str.__init__(self)
AnnotatedBase.__init__(self)


class AnnotatedInt(AnnotatedBase, int):
def __init__(self, *args: int) -> None: # pylint: disable=W0613
int.__init__(self)
AnnotatedBase.__init__(self)

def __str__(self): # pylint: disable=E0307
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these str helper used? When I remove them I see no test failing (this and the others)

ret = AnnotatedStr(int.__str__(self))
ret.set_annotations(self.get_annotations())
return ret


class AnnotatedFloat(AnnotatedBase, float):
def __init__(self, *args: float) -> None: # pylint: disable=W0613
float.__init__(self)
AnnotatedBase.__init__(self)

def __str__(self): # pylint: disable=E0307
ret = AnnotatedStr(float.__str__(self))
ret.set_annotations(self.get_annotations())
return ret


class AnnotatedBool(AnnotatedBase):
""" only bool can't be "subclassed" """

def __init__(self, value: Any) -> None:
self.value: bool = bool(value)
AnnotatedBase.__init__(self)

def __str__(self): # pylint: disable=E0307
ret = AnnotatedStr(bool.__str__(self))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this actually work (it may not matter as we maybe do not need it, see above) but if we do, shouldn't this be something like ret = AnnotatedStr(str(self.value)) or something?

ret.set_annotations(self.get_annotations())
return ret

@property # type: ignore[misc]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not do that, instead just:

diff --git a/src/otk/transform.py b/src/otk/transform.py
index 49513d8..6d7901f 100644
--- a/src/otk/transform.py
+++ b/src/otk/transform.py
@@ -30,7 +30,7 @@ from .error import (
 from .external import call
 from .traversal import State
 from .annotation import AnnotatedDict, AnnotatedList, AnnotatedPath, AnnotatedBase, AnnotatedStr, AnnotatedInt, \
-    AnnotatedFloat
+    AnnotatedFloat, AnnotatedBool
 
 log = logging.getLogger(__name__)
 
@@ -116,7 +116,7 @@ def resolve(ctx: Context, state: State, data: Any) -> Any:
         return resolve_list(ctx, state, data)
     if isinstance(data, AnnotatedStr):
         return resolve_str(ctx, state, data)
-    if isinstance(data, (int, float, bool, type(None))):
+    if isinstance(data, (int, float, AnnotatedBool, type(None))):
         return data
 
     raise ParseTypeError(f"could not look up {type(data)} in resolvers", state)

and remove it, this class is not "bool"; things like "AnnoatedBool(false) is False" will not work so pretending to be seems dangerous (False is a singelton).

def __class__(self):
return bool
16 changes: 3 additions & 13 deletions src/otk/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List

from . import __version__
from .annotation import AnnotatedPath
from .document import Omnifest

log = logging.getLogger(__name__)
Expand All @@ -24,17 +25,12 @@ def run(argv: List[str]) -> int:
handlers=[logging.StreamHandler()],
)

if arguments.version:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change looks unrelated (maybe it's own PR?)

print(f"otk {__version__}")
return 0

if arguments.command == "compile":
return compile(arguments)
if arguments.command == "validate":
return validate(arguments)

parser.print_help()
return 2
raise RuntimeError("Unknown subcommand")


def _process(arguments: argparse.Namespace, dry_run: bool) -> int:
Expand Down Expand Up @@ -106,12 +102,6 @@ def parser_create() -> argparse.ArgumentParser:
default=0,
help="Sets verbosity. Can be passed multiple times to be more verbose.",
)
parser.add_argument(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also looks unrelated?

"-V",
"--version",
action="store_true",
help="Print version and exit."
)
parser.add_argument(
"-W",
"--warn",
Expand All @@ -121,7 +111,7 @@ def parser_create() -> argparse.ArgumentParser:
)

# get a subparser action
subparsers = parser.add_subparsers(dest="command", required=False, metavar="command")
subparsers = parser.add_subparsers(dest="command", required=True, metavar="command")

parser_compile = subparsers.add_parser("compile", help="Compile an omnifest.")
parser_compile.add_argument(
Expand Down
16 changes: 11 additions & 5 deletions src/otk/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from abc import ABC, abstractmethod
from typing import Any, Optional

from .annotation import AnnotatedDict, AnnotatedBase
from .constant import VALID_VAR_NAME_RE
from .error import (ParseError,
TransformVariableIndexRangeError,
Expand All @@ -20,7 +21,7 @@
def validate_var_name(name):
for part in name.split("."):
if not re.fullmatch(VALID_VAR_NAME_RE, part):
raise ParseError(f"invalid variable part '{part}' in '{name}', allowed {VALID_VAR_NAME_RE}")
raise ParseError(f"invalid variable part '{part}' in '{name}', allowed {VALID_VAR_NAME_RE}", annotated=name)


class Context(ABC):
Expand All @@ -45,7 +46,7 @@ class CommonContext(Context):
warn_duplicated_defs: bool
_target_requested: str
_version: Optional[int]
_variables: dict[str, Any]
_variables: AnnotatedDict[str, Any]

def __init__(
self,
Expand All @@ -54,7 +55,7 @@ def __init__(
warn_duplicated_defs: bool = False,
) -> None:
self._version = None
self._variables = {}
self._variables = AnnotatedDict()
self._target_requested = target_requested
self.warn_duplicated_defs = warn_duplicated_defs

Expand Down Expand Up @@ -82,14 +83,19 @@ def _maybe_log_var_override(self, cur_var_scope, parts, value):

def define(self, name: str, value: Any) -> None:
log.debug("defining %r", name)

if not isinstance(value, AnnotatedBase):
# mainly for edge cases and tests
value = AnnotatedBase.deep_convert(value)

validate_var_name(name)

cur_var_scope = self._variables
parts = name.split(".")
for i, part in enumerate(parts[:-1]):
if not isinstance(cur_var_scope.get(part), dict):
self._maybe_log_var_override(cur_var_scope, parts[:i+1], {".".join(parts[i+1:]): value})
cur_var_scope[part] = {}
cur_var_scope[part] = AnnotatedDict()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this area needs more work (and a test), I played with it in https://github.com/osbuild/otk/compare/main...mvo5:annotations-2?expand=1#diff-ad6f996bbd08d84913b570b67a441454f57432e4129c7b8601e0454652501e0fR1 and I think we will need to tweak the way we define dicts in process_defines to make this fully work (and my branch is full of hacks)

cur_var_scope = cur_var_scope[part]
self._maybe_log_var_override(cur_var_scope, parts, value)
cur_var_scope[parts[-1]] = value
Expand Down Expand Up @@ -121,7 +127,7 @@ def variable(self, name: str) -> Any:

return value

def merge_defines(self, name: str, defines: dict[str, Any]) -> None:
def merge_defines(self, name: str, defines: Any) -> None:
if name == "":
self._variables.update(defines)
else:
Expand Down
Loading