diff --git a/taipy/core/config/data_node_config.py b/taipy/core/config/data_node_config.py index 8edc6ce17..12c15301a 100644 --- a/taipy/core/config/data_node_config.py +++ b/taipy/core/config/data_node_config.py @@ -14,6 +14,9 @@ from datetime import timedelta from typing import Any, Callable, Dict, List, Optional, Union +import numpy +import pandas + from taipy.common.config import Config from taipy.common.config._config import _Config from taipy.common.config.common._config_blocker import _ConfigBlocker @@ -71,11 +74,15 @@ class DataNodeConfig(Section): _EXPOSED_TYPE_PANDAS = "pandas" _EXPOSED_TYPE_MODIN = "modin" # Deprecated in favor of pandas since 3.1.0 _EXPOSED_TYPE_NUMPY = "numpy" + _EXPOSED_TYPE_PANDAS_DATAFRAME = pandas.DataFrame + _EXPOSED_TYPE_NUMPY_NDARRAY = numpy.ndarray _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS _ALL_EXPOSED_TYPES = [ _EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY, + _EXPOSED_TYPE_PANDAS_DATAFRAME, + _EXPOSED_TYPE_NUMPY_NDARRAY, ] _OPTIONAL_ENCODING_PROPERTY = "encoding" diff --git a/taipy/core/data/_tabular_datanode_mixin.py b/taipy/core/data/_tabular_datanode_mixin.py index 54e5d0494..5be52f0e5 100644 --- a/taipy/core/data/_tabular_datanode_mixin.py +++ b/taipy/core/data/_tabular_datanode_mixin.py @@ -26,6 +26,8 @@ class _TabularDataNodeMixin(object): _EXPOSED_TYPE_PANDAS = "pandas" _EXPOSED_TYPE_MODIN = "modin" # Deprecated in favor of pandas since 3.1.0 _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY] + _EXPOSED_TYPE_PANDAS_DATAFRAME = pd.DataFrame + _EXPOSED_TYPE_NUMPY_NDARRAY = np.ndarray def __init__(self, **kwargs) -> None: self._decoder: Union[Callable, Any] @@ -44,11 +46,14 @@ def __init__(self, **kwargs) -> None: if callable(custom_encoder): self._encoder = custom_encoder - def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]: - if exposed_type == self._EXPOSED_TYPE_PANDAS and isinstance(data, (pd.DataFrame, pd.Series)): + if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME] and isinstance( + data, (pd.DataFrame, pd.Series) + ): return data - elif exposed_type == self._EXPOSED_TYPE_NUMPY and isinstance(data, np.ndarray): + elif exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY] and isinstance( + data, np.ndarray + ): return pd.DataFrame(data) elif isinstance(data, list) and not isinstance(exposed_type, str): return pd.DataFrame.from_records([self._encoder(row) for row in data]) diff --git a/taipy/core/data/csv.py b/taipy/core/data/csv.py index f61e1b1b5..151658489 100644 --- a/taipy/core/data/csv.py +++ b/taipy/core/data/csv.py @@ -134,9 +134,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any: path = self._path properties = self.properties - if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS: + if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]: return self._read_as_pandas_dataframe(path=path) - if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY: + if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]: return self._read_as_numpy(path=path) return self._read_as(path=path) diff --git a/taipy/core/data/excel.py b/taipy/core/data/excel.py index f7bb0ddfd..0922a83ac 100644 --- a/taipy/core/data/excel.py +++ b/taipy/core/data/excel.py @@ -137,7 +137,7 @@ def write_with_column_names(self, data: Any, columns: List[str] = None, editor_i @staticmethod def _check_exposed_type(exposed_type): - if isinstance(exposed_type, str): + if isinstance(exposed_type, str) or exposed_type in [pd.DataFrame, np.ndarray]: _TabularDataNodeMixin._check_exposed_type(exposed_type) elif isinstance(exposed_type, list): for t in exposed_type: @@ -154,18 +154,18 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any: path = self._path exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY] - if exposed_type == self._EXPOSED_TYPE_PANDAS: + if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]: return self._read_as_pandas_dataframe(path=path) - if exposed_type == self._EXPOSED_TYPE_NUMPY: + if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]: return self._read_as_numpy(path=path) return self._read_as(path=path) def _read_sheet_with_exposed_type( self, path: str, sheet_exposed_type: str, sheet_name: str ) -> Optional[Union[np.ndarray, pd.DataFrame]]: - if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY: + if sheet_exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]: return self._read_as_numpy(path, sheet_name) - elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS: + elif sheet_exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]: return self._read_as_pandas_dataframe(path, sheet_name) # type: ignore return None @@ -202,6 +202,10 @@ def _read_as(self, path: str): sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS) elif isinstance(exposed_type, List): sheet_exposed_type = exposed_type[i] + elif exposed_type == np.ndarray: + sheet_exposed_type = self._EXPOSED_TYPE_NUMPY + elif exposed_type == pd.DataFrame: + sheet_exposed_type = self._EXPOSED_TYPE_PANDAS if isinstance(sheet_exposed_type, str): sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name) diff --git a/taipy/core/data/parquet.py b/taipy/core/data/parquet.py index 698aae58a..395e825f3 100644 --- a/taipy/core/data/parquet.py +++ b/taipy/core/data/parquet.py @@ -227,9 +227,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any: return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs) def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any: - if exposed_type == self._EXPOSED_TYPE_PANDAS: + if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]: return self._read_as_pandas_dataframe(path, kwargs) - if exposed_type == self._EXPOSED_TYPE_NUMPY: + if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]: return self._read_as_numpy(path, kwargs) return self._read_as(path, kwargs) @@ -249,4 +249,3 @@ def _append(self, data: Any): def _write(self, data: Any): self._write_with_kwargs(data) - diff --git a/tests/core/data/test_csv_data_node.py b/tests/core/data/test_csv_data_node.py index 360fd62a5..4dfd04b8f 100644 --- a/tests/core/data/test_csv_data_node.py +++ b/tests/core/data/test_csv_data_node.py @@ -157,6 +157,58 @@ def test_pandas_exposed_type(self): dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"}) assert isinstance(dn.read(), pd.DataFrame) + def test_pandas_dataframe_exposed_type(self): + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pd.DataFrame}) + assert isinstance(dn.read(), pd.DataFrame) + + def test_pandas_dataframe_exposed_type_a(self): + import pandas + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pandas.DataFrame}) + assert isinstance(dn.read(), pandas.DataFrame) + + def test_pandas_dataframe_exposed_type_b(self): + from pandas import DataFrame + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DataFrame}) + assert isinstance(dn.read(), DataFrame) + + def test_pandas_dataframe_exposed_type_c(self): + from pandas import DataFrame as DF + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DF}) + assert isinstance(dn.read(), DF) + + def test_numpy_ndarray_exposed_type(self): + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": np.ndarray}) + assert isinstance(dn.read(), np.ndarray) + + def test_numpy_ndarray_exposed_type_a(self): + import numpy + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": numpy.ndarray}) + assert isinstance(dn.read(), numpy.ndarray) + + def test_numpy_ndarray_exposed_type_b(self): + from numpy import ndarray + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": ndarray}) + assert isinstance(dn.read(), ndarray) + + def test_numpy_ndarray_exposed_type_c(self): + from numpy import ndarray as nd_array + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") + dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": nd_array}) + assert isinstance(dn.read(), nd_array) + def test_raise_error_invalid_exposed_type(self): path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv") with pytest.raises(InvalidExposedType): diff --git a/tests/core/data/test_excel_data_node.py b/tests/core/data/test_excel_data_node.py index 734f72b2d..febba8205 100644 --- a/tests/core/data/test_excel_data_node.py +++ b/tests/core/data/test_excel_data_node.py @@ -302,6 +302,100 @@ def test_pandas_exposed_type(self): data = dn.read() assert isinstance(data, pd.DataFrame) + def test_pandas_dataframe_exposed_type(self): + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", + Scope.SCENARIO, + properties={"default_path": path, "exposed_type": pd.DataFrame, "sheet_name": "Sheet1"}, + ) + assert dn.properties["exposed_type"] == pd.DataFrame + data = dn.read() + assert isinstance(data, pd.DataFrame) + + def test_pandas_dataframe_exposed_type_a(self): + import pandas + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", + Scope.SCENARIO, + properties={"default_path": path, "exposed_type": pandas.DataFrame, "sheet_name": "Sheet1"}, + ) + assert dn.properties["exposed_type"] == pandas.DataFrame + data = dn.read() + assert isinstance(data, pandas.DataFrame) + + def test_pandas_dataframe_exposed_type_b(self): + from pandas import DataFrame + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", + Scope.SCENARIO, + properties={"default_path": path, "exposed_type": DataFrame, "sheet_name": "Sheet1"}, + ) + assert dn.properties["exposed_type"] == DataFrame + data = dn.read() + assert isinstance(data, DataFrame) + + def test_pandas_dataframe_exposed_type_c(self): + from pandas import DataFrame as DF + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", + Scope.SCENARIO, + properties={"default_path": path, "exposed_type": DF, "sheet_name": "Sheet1"}, + ) + assert dn.properties["exposed_type"] == DF + data = dn.read() + assert isinstance(data, DF) + + def test_numpy_ndarray_exposed_type(self): + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": np.ndarray, "sheet_name": "Sheet1"} + ) + assert dn.properties["exposed_type"] == np.ndarray + data = dn.read() + assert isinstance(data, np.ndarray) + + def test_numpy_ndarray_exposed_type_a(self): + import numpy + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", + Scope.SCENARIO, + properties={"default_path": path, "exposed_type": numpy.ndarray, "sheet_name": "Sheet1"}, + ) + assert dn.properties["exposed_type"] == numpy.ndarray + data = dn.read() + assert isinstance(data, numpy.ndarray) + + def test_numpy_ndarray_exposed_type_b(self): + from numpy import ndarray + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": ndarray, "sheet_name": "Sheet1"} + ) + assert dn.properties["exposed_type"] == ndarray + data = dn.read() + assert isinstance(data, ndarray) + + def test_numpy_ndarray_exposed_type_c(self): + from numpy import ndarray as nd_array + + path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx") + dn = ExcelDataNode( + "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": nd_array, "sheet_name": "Sheet1"} + ) + assert dn.properties["exposed_type"] == nd_array + data = dn.read() + assert isinstance(data, nd_array) + def test_complex_exposed_type_dict(self): # ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"] path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx") diff --git a/tests/core/data/test_parquet_data_node.py b/tests/core/data/test_parquet_data_node.py index 02dd247b6..109ae3c90 100644 --- a/tests/core/data/test_parquet_data_node.py +++ b/tests/core/data/test_parquet_data_node.py @@ -98,6 +98,20 @@ def test_create(self): assert isinstance(dn_1, ParquetDataNode) assert dn_1.properties["exposed_type"] == MyCustomObject + parquet_dn_config_2 = Config.configure_parquet_data_node( + id="bar", default_path=path, compression=compression, exposed_type=np.ndarray + ) + dn_2 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_2, None, None) + assert isinstance(dn_2, ParquetDataNode) + assert dn_2.properties["exposed_type"] == np.ndarray + + parquet_dn_config_3 = Config.configure_parquet_data_node( + id="bar", default_path=path, compression=compression, exposed_type=pd.DataFrame + ) + dn_3 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_3, None, None) + assert isinstance(dn_3, ParquetDataNode) + assert dn_3.properties["exposed_type"] == pd.DataFrame + with pytest.raises(InvalidConfigurationId): dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})