-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
otk/utils: introduce Annotated* objects #280
base: main
Are you sure you want to change the base?
Changes from all commits
2499d72
5fb3563
dc2afea
95798b8
5acf377
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
import copy | ||
import os | ||
import pathlib | ||
import sys | ||
from typing import TypeVar, Generic, Any, Optional | ||
|
||
|
||
class AnnotatedBase: | ||
""" Implements a base class for annotated objects. | ||
The annotations are basically a dict[str, any] of properties to attach to the class. | ||
""" | ||
|
||
# base directory to calculate relative paths (easier to read) | ||
_basedir = os.path.curdir | ||
|
||
def __init__(self) -> None: | ||
self._otk_annotations: dict[str, Any] = {} | ||
if isinstance(self, list): | ||
for idx, value in enumerate(self): | ||
self[idx] = self.deep_convert(value) | ||
elif isinstance(self, dict): | ||
for key, value in self.items(): | ||
self[key] = self.deep_convert(value) | ||
|
||
def get_annotation(self, key: str, default: Any = None) -> Any: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the desire to write a nice generic way to do annotations. At the same time, going more into YAGNI/KISS land might be beneficial here, AFAICT we only have a single annotation that we care about ("src") and the rest is there because we might need them later. We could probably archive the same via a single otk annotation that becomes just a dataclass so we can still expand it. My strawman would be to have I played around with this branch a bit in a version and I think just using a |
||
if key in self._otk_annotations: | ||
return self._otk_annotations[key] | ||
return default | ||
|
||
def set_annotation(self, key: str, val: Any) -> Any: | ||
self._otk_annotations[key] = val | ||
|
||
def get_annotations(self) -> dict[str, Any]: | ||
"""Returns all annotations""" | ||
return self._otk_annotations | ||
|
||
def set_annotations(self, annotations: dict[str, Any]) -> None: | ||
""" set all annotations """ | ||
self._otk_annotations = annotations | ||
|
||
@classmethod | ||
def deep_convert(cls, data: Any) -> Any: | ||
"""Recursively convert nested dicts and lists into AnnotatedDict and HiddenAttrList.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAICT this is only needed when we we load data from the externals where it comes in via json. might be worth a comment. For the yaml the values are constructed from the ground up, so if we hook into the constructoin we always have fully Annoated* things. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I think this might be obsolete when also all tests use annotated objects when interacting with internals. I wanted to collect feedback first before changing all tests. |
||
ret = data | ||
if isinstance(data, AnnotatedBase): | ||
return ret | ||
if isinstance(data, dict): | ||
ret = AnnotatedDict({key: cls.deep_convert(value) for key, value in data.items()}) | ||
elif isinstance(data, list): | ||
ret = AnnotatedList([cls.deep_convert(item) for item in data]) | ||
elif isinstance(data, str): | ||
ret = AnnotatedStr(data) | ||
elif isinstance(data, bool): | ||
ret = AnnotatedBool(data) | ||
elif isinstance(data, int): | ||
ret = AnnotatedInt(data) | ||
elif isinstance(data, float): | ||
ret = AnnotatedFloat(data) | ||
|
||
# note: "complex" is not implemented as it's not used in YAML | ||
|
||
return ret | ||
|
||
@classmethod | ||
def deep_dump(cls, data: Any) -> Any: | ||
""" Converting to builtin types is usually not necessary. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAICT this can be done in a slightly simpler way with a custom json encoder, c.f. https://github.com/osbuild/otk/compare/main...mvo5:annotations-2?expand=1#diff-7a465d7d1dc04950d6fb0e7f8e2a314b952542824dd3ef0145b7dc00947f67d8R65 - it is only needed for custom bools (which is a bit sad) |
||
This special conversion is mainly for json.dumps(). json library needs "bool" to pass the | ||
condition "<var> is True" which does not apply to AnnotatedBool, | ||
so we'll just convert all | ||
""" | ||
ret = data | ||
if isinstance(data, AnnotatedDict): | ||
ret = {key: cls.deep_dump(value) for key, value in data.items()} | ||
elif isinstance(data, AnnotatedList): | ||
ret = [cls.deep_dump(item) for item in data] | ||
elif isinstance(data, AnnotatedBool): | ||
ret = data.value | ||
elif isinstance(data, AnnotatedFloat): | ||
ret = float(data) | ||
elif isinstance(data, AnnotatedInt): | ||
ret = int(data) | ||
elif isinstance(data, AnnotatedStr): | ||
ret = str(data) | ||
|
||
return ret | ||
|
||
def add_source_attributes(self, key_data, prefix=""): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This add a tight coupling of AnnotatedBase to an yaml node, i.e. Annotated node now needs to know details of yaml nodes, having the helper outside would reduce this coupling. |
||
line_number = key_data.start_mark.line + 1 | ||
line_number_end = key_data.end_mark.line + 1 | ||
column = key_data.start_mark.column + 1 | ||
column_end = key_data.end_mark.column + 1 | ||
filename = os.path.relpath(key_data.start_mark.name, self._basedir) | ||
self.set_annotation(f"{prefix}src", f"{filename}:{line_number}") | ||
self.set_annotation(f"{prefix}filename", filename) | ||
self.set_annotation(f"{prefix}line_number", line_number) | ||
self.set_annotation(f"{prefix}line_number_end", line_number_end) | ||
self.set_annotation(f"{prefix}column", column) | ||
self.set_annotation(f"{prefix}column_end", column_end) | ||
|
||
def __add__(self, o): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. afaict the various *add is only used in a test, could we YAGNI it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will also be used when implementing more otk operations? |
||
ret = self.deep_convert(super().__add__(o)) # pylint: disable=E1101 | ||
ret.set_annotations(self.squash_annotations([self, o])) | ||
return ret | ||
|
||
def __iadd__(self, o): | ||
return self.__add__(o) | ||
|
||
def __radd__(self, o): | ||
return self.deep_convert(o).__add__(self) | ||
|
||
def __deepcopy__(self, memo): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a way to avoid this, this is a pretty complex function and if I remove it only the "FAILED test/test_example.py::test_errors[/home/mvogt/devel/otk/test/data/error/01-circular.yaml] - AssertionError: assert 'circular include detected:\ntest/data/error/01-circ..." for a "real-life" test fails, could this be archived in another way maybe (e.g. via something like |
||
if isinstance(self, AnnotatedDict): | ||
cls = self.__class__ | ||
result = cls.__new__(cls) | ||
memo[id(self)] = result | ||
|
||
for k, v in self.items(): | ||
result[k] = copy.deepcopy(v, memo) | ||
|
||
elif isinstance(self, AnnotatedList): | ||
cls = self.__class__ | ||
result = cls.__new__(cls) | ||
memo[id(self)] = result | ||
|
||
for v in self: | ||
result.append(copy.deepcopy(v, memo)) | ||
elif isinstance(self, (AnnotatedStr, AnnotatedInt, AnnotatedFloat, AnnotatedBool, AnnotatedPath)): | ||
result = copy.copy(self) | ||
else: | ||
result = None # just for the linter - that should never happen | ||
|
||
result.set_annotations(self.get_annotations().copy()) | ||
|
||
return result | ||
|
||
@classmethod | ||
def squash_annotations(cls, annotation_list: list) -> dict: | ||
""" just aggregates all annotations in the list. | ||
|
||
new annotation keys get appended to the resulting dict | ||
|
||
duplicate annotations will get converted to string | ||
(if you need an exception for a key please implement here) | ||
|
||
for "src" there is an exception | ||
this is expected to be in the form "filename:line_number" | ||
and will be sanely unified | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How will this be used in the real code? If I remove it I only see the unit test failing but no test that shows how this is used in a otk document
Comment on lines
+137
to
+148
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a rather poor docstring for a method that isn't very obvious or easy to read. The first line starting with "just" doesn't help. "Aggregates all annotations in the list" would be a good first line for it, the "just" makes it sound like things will be simple, but the function body suggests otherwise. But I would also add "into a dictionary keyed by [something]". "New annotation keys get appended to the dict". Isn't every key here new? How can there be old annotation keys? This looks like a pure function (it's annotated as a classmethod but it's actually static right? "Duplicate annotations will get converted to a string". Does this mean unique annotations are different types? What does that mean for the result? "(if you need an exception for a key please implement here)". I would change this to: and then follow it with:
But also, I'm not entirely clear what the standard behaviour is like so I don't really understand what the exception is excepting. |
||
ret: dict[str, Any] = {} | ||
files: dict[str, str] = {} | ||
for item in annotation_list: | ||
if not isinstance(item, AnnotatedBase): | ||
continue | ||
annotations = item.get_annotations() | ||
for k in annotations.keys(): | ||
if k == "src": | ||
src = annotations[k].split(":") | ||
if src[0] in files: | ||
files[src[0]] += ", " + src[1] | ||
else: | ||
files[src[0]] = src[1] | ||
else: | ||
if k in ret: | ||
ret[k] = str(ret[k]) + str(annotations[k]) | ||
else: | ||
ret[k] = annotations[k] | ||
|
||
ret["src"] = "\n".join([f"* {k}:{v}" for k, v in files.items()]) | ||
|
||
return ret | ||
|
||
|
||
AnnotatedDictKeyT = TypeVar('AnnotatedDictKeyT') | ||
AnnotatedDictVarT = TypeVar('AnnotatedDictVarT') | ||
|
||
AnnotatedListT = TypeVar('AnnotatedListT') | ||
|
||
|
||
class AnnotatedList(Generic[AnnotatedListT], AnnotatedBase, list): | ||
def __init__(self, seq: Optional[list[Any]] = None) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change (and the other type annotation changes in this commit) doesn't seem relevant to the commit and its message. |
||
if seq is None: | ||
seq = [] | ||
list.__init__(self, seq) | ||
AnnotatedBase.__init__(self) | ||
|
||
|
||
class AnnotatedDict(Generic[AnnotatedDictKeyT, AnnotatedDictVarT], AnnotatedBase, dict): | ||
def __init__(self, seq: Optional[dict[Any, Any]] = None, **kwargs: dict[Any, Any]) -> None: | ||
if seq is None: | ||
seq = {} | ||
dict.__init__(self, seq, **kwargs) | ||
AnnotatedBase.__init__(self) | ||
|
||
|
||
class AnnotatedPathBase(AnnotatedBase): | ||
def fspath_with_include(self) -> str: | ||
src = None | ||
filename = os.path.relpath(os.fspath(str(self)), self._basedir) | ||
try: | ||
src = self.get_annotation("src") | ||
except KeyError: | ||
pass | ||
if src is not None: | ||
return f"{filename} (included from {src})" | ||
return filename | ||
|
||
|
||
if sys.version_info >= (3, 10): | ||
class AnnotatedPath(AnnotatedPathBase, pathlib.Path): | ||
def __init__(self, *args: Any, **kwargs: Any) -> None: | ||
pathlib.Path.__init__(self, *args, **kwargs) | ||
AnnotatedPathBase.__init__(self) | ||
else: | ||
class AnnotatedPath(AnnotatedPathBase, pathlib.PosixPath): # pylint: disable=abstract-method | ||
def __init__(self, *args: Any) -> None: # pylint: disable=W0613 | ||
pathlib.PosixPath.__init__(self) | ||
AnnotatedPathBase.__init__(self) | ||
Comment on lines
+208
to
+217
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a common behaviour we can use here instead of having potentially different behaviour when switching Python versions? Does using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The implementation of pathlib changed in those python versions and only has to be handled explicitly by code that directly calls the "init" method which you usually don't do but in this case it's needed as we inherit here and the object doesn't seem to be properly instantiated otherwise. |
||
|
||
|
||
class AnnotatedStr(AnnotatedBase, str): | ||
def __init__(self, *args: str) -> None: # pylint: disable=W0613 | ||
str.__init__(self) | ||
AnnotatedBase.__init__(self) | ||
|
||
|
||
class AnnotatedInt(AnnotatedBase, int): | ||
def __init__(self, *args: int) -> None: # pylint: disable=W0613 | ||
int.__init__(self) | ||
AnnotatedBase.__init__(self) | ||
|
||
def __str__(self): # pylint: disable=E0307 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these str helper used? When I remove them I see no test failing (this and the others) |
||
ret = AnnotatedStr(int.__str__(self)) | ||
ret.set_annotations(self.get_annotations()) | ||
return ret | ||
|
||
|
||
class AnnotatedFloat(AnnotatedBase, float): | ||
def __init__(self, *args: float) -> None: # pylint: disable=W0613 | ||
float.__init__(self) | ||
AnnotatedBase.__init__(self) | ||
|
||
def __str__(self): # pylint: disable=E0307 | ||
ret = AnnotatedStr(float.__str__(self)) | ||
ret.set_annotations(self.get_annotations()) | ||
return ret | ||
|
||
|
||
class AnnotatedBool(AnnotatedBase): | ||
""" only bool can't be "subclassed" """ | ||
|
||
def __init__(self, value: Any) -> None: | ||
self.value: bool = bool(value) | ||
AnnotatedBase.__init__(self) | ||
|
||
def __str__(self): # pylint: disable=E0307 | ||
ret = AnnotatedStr(bool.__str__(self)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this actually work (it may not matter as we maybe do not need it, see above) but if we do, shouldn't this be something like |
||
ret.set_annotations(self.get_annotations()) | ||
return ret | ||
|
||
@property # type: ignore[misc] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would not do that, instead just:
and remove it, this class is not "bool"; things like "AnnoatedBool(false) is False" will not work so pretending to be seems dangerous (False is a singelton). |
||
def __class__(self): | ||
return bool |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
import argparse | ||
import logging | ||
import pathlib | ||
import sys | ||
from typing import List | ||
|
||
from . import __version__ | ||
from .annotation import AnnotatedPath | ||
from .document import Omnifest | ||
|
||
log = logging.getLogger(__name__) | ||
|
@@ -24,17 +24,12 @@ def run(argv: List[str]) -> int: | |
handlers=[logging.StreamHandler()], | ||
) | ||
|
||
if arguments.version: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change looks unrelated (maybe it's own PR?) |
||
print(f"otk {__version__}") | ||
return 0 | ||
|
||
if arguments.command == "compile": | ||
return compile(arguments) | ||
if arguments.command == "validate": | ||
return validate(arguments) | ||
|
||
parser.print_help() | ||
return 2 | ||
raise RuntimeError("Unknown subcommand") | ||
|
||
|
||
def _process(arguments: argparse.Namespace, dry_run: bool) -> int: | ||
|
@@ -43,9 +38,9 @@ def _process(arguments: argparse.Namespace, dry_run: bool) -> int: | |
dst = sys.stdout if arguments.output is None else open(arguments.output, "w", encoding="utf-8") | ||
|
||
if arguments.input is None: | ||
path = pathlib.Path(f"/proc/self/fd/{sys.stdin.fileno()}") | ||
path = AnnotatedPath(f"/proc/self/fd/{sys.stdin.fileno()}") | ||
else: | ||
path = pathlib.Path(arguments.input) | ||
path = AnnotatedPath(arguments.input) | ||
|
||
# First pass of resolving the otk file is "shallow", it will not run | ||
# externals and not resolve anything under otk.target.* | ||
|
@@ -106,12 +101,6 @@ def parser_create() -> argparse.ArgumentParser: | |
default=0, | ||
help="Sets verbosity. Can be passed multiple times to be more verbose.", | ||
) | ||
parser.add_argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also looks unrelated? |
||
"-V", | ||
"--version", | ||
action="store_true", | ||
help="Print version and exit." | ||
) | ||
parser.add_argument( | ||
"-W", | ||
"--warn", | ||
|
@@ -121,7 +110,7 @@ def parser_create() -> argparse.ArgumentParser: | |
) | ||
|
||
# get a subparser action | ||
subparsers = parser.add_subparsers(dest="command", required=False, metavar="command") | ||
subparsers = parser.add_subparsers(dest="command", required=True, metavar="command") | ||
|
||
parser_compile = subparsers.add_parser("compile", help="Compile an omnifest.") | ||
parser_compile.add_argument( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems unrelated to the PR(?) and could be it's own?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in my setup this is needed to run pytest, it could be split but I need to merge this first because otherwise it fails to commit.
I'll to a separate PR, merge and rebase :-D