Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_index function for parsing NDIndex from string #91

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,15 @@ dmypy.json

# Pyre type checker
.pyre/

# jetbrains ide stuff
*.iml
.idea/

# vscode ide stuff
*.code-workspace
.history/
.vscode/

# project scratch dir
/scratch/
67 changes: 67 additions & 0 deletions ndindex/literal_eval_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import ast
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a module level docstring


def literal_eval_index(node_or_string):
"""
"Safely" (needs validation) evaluate an expression node or a string containing
a (limited subset) of valid numpy index or slice expressions.
"""
if isinstance(node_or_string, str):
node_or_string = ast.parse('dummy[{}]'.format(node_or_string.lstrip(" \t")) , mode='eval')
if isinstance(node_or_string, ast.Expression):
node_or_string = node_or_string.body
if isinstance(node_or_string, ast.Subscript):
node_or_string = node_or_string.slice

def _raise_malformed_node(node):
raise ValueError(f'malformed node or string: {node!r}')

# from cpy37, should work until they remove ast.Num (not until cpy310)
def _convert_num(node):
if isinstance(node, ast.Constant):
if isinstance(node.value, (int, float, complex)):
return node.value
elif isinstance(node, ast.Num):
# ast.Num was removed from ast grammar in cpy38
return node.n
_raise_malformed_node(node)
def _convert_signed_num(node):
if isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)):
operand = _convert_num(node.operand)
if isinstance(node.op, ast.UAdd):
return + operand
else:
return - operand
return _convert_num(node)

def _convert(node):
if isinstance(node, ast.Tuple):
return tuple(map(_convert, node.elts))
elif isinstance(node, ast.List):
return list(map(_convert, node.elts))
elif isinstance(node, ast.Slice):
return slice(
_convert(node.lower) if node.lower is not None else None,
_convert(node.upper) if node.upper is not None else None,
_convert(node.step) if node.step is not None else None,
)
elif isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'slice' and node.keywords == []:
# support for parsing slices written out as 'slice(...)' objects
return slice(*map(_convert, node.args))
elif isinstance(node, ast.NameConstant) and node.value is None:
# support for literal None in slices, eg 'slice(None, ...)'
return None
elif isinstance(node, ast.Ellipsis):
# support for three dot '...' ellipsis syntax
return ...
elif isinstance(node, ast.Name) and node.id == 'Ellipsis':
# support for 'Ellipsis' ellipsis syntax
return ...
elif isinstance(node, ast.Index):
# ast.Index was removed from ast grammar in cpy39
return _convert(node.value)
elif isinstance(node, ast.ExtSlice):
# ast.ExtSlice was removed from ast grammar in cpy39
return tuple(map(_convert, node.dims))

return _convert_signed_num(node)
return _convert(node_or_string)
69 changes: 69 additions & 0 deletions ndindex/tests/test_literal_eval_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import ast
from hypothesis import given, example
from hypothesis.strategies import one_of
import pytest

from ..literal_eval_index import literal_eval_index
from .helpers import ellipses, ints, slices, tuples, _doesnt_raise

Tuples = tuples(one_of(
ellipses(),
ints(),
slices(),
)).filter(_doesnt_raise)

ndindexStrs = one_of(
ellipses(),
ints(),
slices(),
Tuples,
).map(lambda x: f'{x}')

class _Dummy:
def __getitem__(self, x):
return x
_dummy = _Dummy()

@example('3')
@example('-3')
@example('...')
@example('Ellipsis')
@example('+3')
@example('3:4')
@example('3:-4')
@example('3, 5, 14, 1')
@example('3, -5, 14, -1')
@example('3:15, 5, 14:99, 1')
@example('3:15, -5, 14:-99, 1')
@example(':15, -5, 14:-99:3, 1')
@example('3:15, -5, :, [1,2,3]')
@example('slice(None)')
@example('slice(None, None)')
@example('slice(None, None, None)')
@example('slice(14)')
@example('slice(12, 14)')
@example('slice(12, 72, 14)')
@example('slice(-12, -72, 14)')
@example('3:15, -5, slice(12, -14), (1,2,3)')
@example('..., -5, slice(12, -14), (1,2,3)')
@example('3:15, -5, slice(12, -14), (1,2,3), Ellipsis')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are not valid indices, at least as far as ndindex is concerned. Nested tuples are not allowed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did see

https://github.com/Quansight/ndindex/blob/9e5399fb20f5800474aa2bf081e8323cfeaeabf4/ndindex/tuple.py#L65

Why did you decide to be more restrictive about this syntax than numpy?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason is that I wanted to avoid confusion from both Tuple(1, 2) and Tuple((1, 2)) being valid and meaning different things. See #17.

I also had thought that using a tuple to represent an array was deprecated, but now I can't find that. Certainly there is ambiguity even at the NumPy level. If you expect a tuple to act like a fancy index, then you will be surprised that a[array([1, 2])] will give a different thing than a[(1, 2)].

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Particularly this comment #17 (comment). Perhaps this should be added to the docs. I do have https://quansight.github.io/ndindex/type-confusion.html#tuple, but that is more about best practices and doesn't really discuss why things are the way they are.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, so it will be a little difficult to implement disallowing nested tuples indexes, since there are a bunch of places where we do need to parse ast.Tuple regardless. I think the way to do this will be to include some kind of guard in the ast.Tuple handling clause of _convert that will raise an error if it gets called twice. So far I've got:

class _Guard:
    def __init__(self):
        self.val = False

    def __call__(self):
        if self.val:
            return True
        else:
            self.val = True
            return False

and then

    _nested_tuple_guard = _Guard()
    def _convert(node):
        if isinstance(node, ast.Tuple):
            if _nested_tuple_guard():
                raise ValueError(f'tuples inside of tuple indices are not supported: {node!r}')

            return tuple(map(_convert, node.elts))
    ...

There's probably a cleaner way to implement the guard

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the simpler way is to not worry about nonsense combinations here, and let ndindex() handle it at the end.

@given(ndindexStrs)
def test_literal_eval_index_hypothesis(ixStr):
assert eval(f'_dummy[{ixStr}]') == literal_eval_index(ixStr)

def test_literal_eval_index_malformed_raise():
with pytest.raises(ValueError):
# we don't allow the bitwise not unary op
ixStr = '~3'
literal_eval_index(ixStr)

def test_literal_eval_index_ensure_coverage():
# ensure full coverage, regarless of cpy version and accompanying changes to the ast grammar
for node in (
ast.Constant(7),
ast.Num(7),
ast.Index(ast.Constant(7)),
):
assert literal_eval_index(node) == 7

assert literal_eval_index(ast.ExtSlice((ast.Constant(7), ast.Constant(7), ast.Constant(7)))) == (7, 7, 7)
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
"sympy",
],
tests_require=[
'pytest',
'hypothesis',
"pytest",
"pytest-flakes",
"pytest-tornasync",
"hypothesis",
],
classifiers=[
"Programming Language :: Python :: 3",
Expand Down