Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/#398-expand-exposed-type-parameter #2296

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions taipy/core/config/data_node_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from datetime import timedelta
from typing import Any, Callable, Dict, List, Optional, Union

import numpy
import pandas

from taipy.common.config import Config
from taipy.common.config._config import _Config
from taipy.common.config.common._config_blocker import _ConfigBlocker
Expand Down Expand Up @@ -71,11 +74,15 @@ class DataNodeConfig(Section):
_EXPOSED_TYPE_PANDAS = "pandas"
_EXPOSED_TYPE_MODIN = "modin" # Deprecated in favor of pandas since 3.1.0
_EXPOSED_TYPE_NUMPY = "numpy"
_EXPOSED_TYPE_PANDAS_DATAFRAME = pandas.DataFrame
_EXPOSED_TYPE_NUMPY_NDARRAY = numpy.ndarray
_DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS

_ALL_EXPOSED_TYPES = [
_EXPOSED_TYPE_PANDAS,
_EXPOSED_TYPE_NUMPY,
_EXPOSED_TYPE_PANDAS_DATAFRAME,
_EXPOSED_TYPE_NUMPY_NDARRAY,
]

_OPTIONAL_ENCODING_PROPERTY = "encoding"
Expand Down
11 changes: 8 additions & 3 deletions taipy/core/data/_tabular_datanode_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class _TabularDataNodeMixin(object):
_EXPOSED_TYPE_PANDAS = "pandas"
_EXPOSED_TYPE_MODIN = "modin" # Deprecated in favor of pandas since 3.1.0
_VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
_EXPOSED_TYPE_PANDAS_DATAFRAME = pd.DataFrame
_EXPOSED_TYPE_NUMPY_NDARRAY = np.ndarray

def __init__(self, **kwargs) -> None:
self._decoder: Union[Callable, Any]
Expand All @@ -44,11 +46,14 @@ def __init__(self, **kwargs) -> None:
if callable(custom_encoder):
self._encoder = custom_encoder


def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]:
if exposed_type == self._EXPOSED_TYPE_PANDAS and isinstance(data, (pd.DataFrame, pd.Series)):
if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME] and isinstance(
data, (pd.DataFrame, pd.Series)
):
return data
elif exposed_type == self._EXPOSED_TYPE_NUMPY and isinstance(data, np.ndarray):
elif exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY] and isinstance(
data, np.ndarray
):
return pd.DataFrame(data)
elif isinstance(data, list) and not isinstance(exposed_type, str):
return pd.DataFrame.from_records([self._encoder(row) for row in data])
Expand Down
4 changes: 2 additions & 2 deletions taipy/core/data/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
path = self._path

properties = self.properties
if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
return self._read_as_pandas_dataframe(path=path)
if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
return self._read_as_numpy(path=path)
return self._read_as(path=path)

Expand Down
14 changes: 9 additions & 5 deletions taipy/core/data/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def write_with_column_names(self, data: Any, columns: List[str] = None, editor_i

@staticmethod
def _check_exposed_type(exposed_type):
if isinstance(exposed_type, str):
if isinstance(exposed_type, str) or exposed_type in [pd.DataFrame, np.ndarray]:
_TabularDataNodeMixin._check_exposed_type(exposed_type)
elif isinstance(exposed_type, list):
for t in exposed_type:
Expand All @@ -154,18 +154,18 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
path = self._path

exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
if exposed_type == self._EXPOSED_TYPE_PANDAS:
if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
return self._read_as_pandas_dataframe(path=path)
if exposed_type == self._EXPOSED_TYPE_NUMPY:
if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
return self._read_as_numpy(path=path)
return self._read_as(path=path)

def _read_sheet_with_exposed_type(
self, path: str, sheet_exposed_type: str, sheet_name: str
) -> Optional[Union[np.ndarray, pd.DataFrame]]:
if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
if sheet_exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
return self._read_as_numpy(path, sheet_name)
elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
elif sheet_exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
return self._read_as_pandas_dataframe(path, sheet_name) # type: ignore
return None

Expand Down Expand Up @@ -202,6 +202,10 @@ def _read_as(self, path: str):
sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS)
elif isinstance(exposed_type, List):
sheet_exposed_type = exposed_type[i]
elif exposed_type == np.ndarray:
sheet_exposed_type = self._EXPOSED_TYPE_NUMPY
elif exposed_type == pd.DataFrame:
sheet_exposed_type = self._EXPOSED_TYPE_PANDAS

if isinstance(sheet_exposed_type, str):
sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)
Expand Down
5 changes: 2 additions & 3 deletions taipy/core/data/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs)

def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any:
if exposed_type == self._EXPOSED_TYPE_PANDAS:
if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
return self._read_as_pandas_dataframe(path, kwargs)
if exposed_type == self._EXPOSED_TYPE_NUMPY:
if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
return self._read_as_numpy(path, kwargs)
return self._read_as(path, kwargs)

Expand All @@ -249,4 +249,3 @@ def _append(self, data: Any):

def _write(self, data: Any):
self._write_with_kwargs(data)

52 changes: 52 additions & 0 deletions tests/core/data/test_csv_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,58 @@ def test_pandas_exposed_type(self):
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
assert isinstance(dn.read(), pd.DataFrame)

def test_pandas_dataframe_exposed_type(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pd.DataFrame})
assert isinstance(dn.read(), pd.DataFrame)

def test_pandas_dataframe_exposed_type_a(self):
import pandas

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pandas.DataFrame})
assert isinstance(dn.read(), pandas.DataFrame)

def test_pandas_dataframe_exposed_type_b(self):
from pandas import DataFrame

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DataFrame})
assert isinstance(dn.read(), DataFrame)

def test_pandas_dataframe_exposed_type_c(self):
from pandas import DataFrame as DF

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DF})
assert isinstance(dn.read(), DF)

def test_numpy_ndarray_exposed_type(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": np.ndarray})
assert isinstance(dn.read(), np.ndarray)

def test_numpy_ndarray_exposed_type_a(self):
import numpy

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": numpy.ndarray})
assert isinstance(dn.read(), numpy.ndarray)

def test_numpy_ndarray_exposed_type_b(self):
from numpy import ndarray

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": ndarray})
assert isinstance(dn.read(), ndarray)

def test_numpy_ndarray_exposed_type_c(self):
from numpy import ndarray as nd_array

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": nd_array})
assert isinstance(dn.read(), nd_array)

def test_raise_error_invalid_exposed_type(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
with pytest.raises(InvalidExposedType):
Expand Down
94 changes: 94 additions & 0 deletions tests/core/data/test_excel_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,100 @@ def test_pandas_exposed_type(self):
data = dn.read()
assert isinstance(data, pd.DataFrame)

def test_pandas_dataframe_exposed_type(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo",
Scope.SCENARIO,
properties={"default_path": path, "exposed_type": pd.DataFrame, "sheet_name": "Sheet1"},
)
assert dn.properties["exposed_type"] == pd.DataFrame
data = dn.read()
assert isinstance(data, pd.DataFrame)

def test_pandas_dataframe_exposed_type_a(self):
import pandas

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo",
Scope.SCENARIO,
properties={"default_path": path, "exposed_type": pandas.DataFrame, "sheet_name": "Sheet1"},
)
assert dn.properties["exposed_type"] == pandas.DataFrame
data = dn.read()
assert isinstance(data, pandas.DataFrame)

def test_pandas_dataframe_exposed_type_b(self):
from pandas import DataFrame

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo",
Scope.SCENARIO,
properties={"default_path": path, "exposed_type": DataFrame, "sheet_name": "Sheet1"},
)
assert dn.properties["exposed_type"] == DataFrame
data = dn.read()
assert isinstance(data, DataFrame)

def test_pandas_dataframe_exposed_type_c(self):
from pandas import DataFrame as DF

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo",
Scope.SCENARIO,
properties={"default_path": path, "exposed_type": DF, "sheet_name": "Sheet1"},
)
assert dn.properties["exposed_type"] == DF
data = dn.read()
assert isinstance(data, DF)

def test_numpy_ndarray_exposed_type(self):
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": np.ndarray, "sheet_name": "Sheet1"}
)
assert dn.properties["exposed_type"] == np.ndarray
data = dn.read()
assert isinstance(data, np.ndarray)

def test_numpy_ndarray_exposed_type_a(self):
import numpy

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo",
Scope.SCENARIO,
properties={"default_path": path, "exposed_type": numpy.ndarray, "sheet_name": "Sheet1"},
)
assert dn.properties["exposed_type"] == numpy.ndarray
data = dn.read()
assert isinstance(data, numpy.ndarray)

def test_numpy_ndarray_exposed_type_b(self):
from numpy import ndarray

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": ndarray, "sheet_name": "Sheet1"}
)
assert dn.properties["exposed_type"] == ndarray
data = dn.read()
assert isinstance(data, ndarray)

def test_numpy_ndarray_exposed_type_c(self):
from numpy import ndarray as nd_array

path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
dn = ExcelDataNode(
"foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": nd_array, "sheet_name": "Sheet1"}
)
assert dn.properties["exposed_type"] == nd_array
data = dn.read()
assert isinstance(data, nd_array)

def test_complex_exposed_type_dict(self):
# ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx")
Expand Down
14 changes: 14 additions & 0 deletions tests/core/data/test_parquet_data_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ def test_create(self):
assert isinstance(dn_1, ParquetDataNode)
assert dn_1.properties["exposed_type"] == MyCustomObject

parquet_dn_config_2 = Config.configure_parquet_data_node(
id="bar", default_path=path, compression=compression, exposed_type=np.ndarray
)
dn_2 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_2, None, None)
assert isinstance(dn_2, ParquetDataNode)
assert dn_2.properties["exposed_type"] == np.ndarray

parquet_dn_config_3 = Config.configure_parquet_data_node(
id="bar", default_path=path, compression=compression, exposed_type=pd.DataFrame
)
dn_3 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_3, None, None)
assert isinstance(dn_3, ParquetDataNode)
assert dn_3.properties["exposed_type"] == pd.DataFrame

with pytest.raises(InvalidConfigurationId):
dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})

Expand Down