Avaiga · trgiangdo · Dec 24, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/Pipfile b/Pipfile
@@ -36,6 +36,7 @@ boto3 = "==1.29.1"
 watchdog = "==4.0.0"
 charset-normalizer = "==3.3.2"
 numpy = "<2.0.0"
+pre-commit = "*"
 
 [dev-packages]
 freezegun = "*"

diff --git a/git b/git
diff --git a/taipy/core/config/data_node_config.py b/taipy/core/config/data_node_config.py
@@ -14,6 +14,9 @@
 from datetime import timedelta
 from typing import Any, Callable, Dict, List, Optional, Union
 
+import numpy
+import pandas
+
 from taipy.common.config import Config
 from taipy.common.config._config import _Config
 from taipy.common.config.common._config_blocker import _ConfigBlocker
@@ -71,11 +74,17 @@ class DataNodeConfig(Section):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _EXPOSED_TYPE_NUMPY = "numpy"
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pandas.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = numpy.ndarray
+    _EXPOSED_TYPE_LIST = List
     _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS
 
     _ALL_EXPOSED_TYPES = [
         _EXPOSED_TYPE_PANDAS,
         _EXPOSED_TYPE_NUMPY,
+        _EXPOSED_TYPE_PANDAS_DATAFRAME,
+        _EXPOSED_TYPE_NUMPY_NDARRAY,
+        _EXPOSED_TYPE_LIST,
     ]
 
     _OPTIONAL_ENCODING_PROPERTY = "encoding"

diff --git a/taipy/core/data/_tabular_datanode_mixin.py b/taipy/core/data/_tabular_datanode_mixin.py
@@ -26,6 +26,9 @@ class _TabularDataNodeMixin(object):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
+    _VALID_OTHER_EXPOSED_TYPES = [pd.DataFrame, np.ndarray]
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pd.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = np.ndarray
 
     def __init__(self, **kwargs) -> None:
         self._decoder: Union[Callable, Any]
@@ -44,11 +47,14 @@ def __init__(self, **kwargs) -> None:
         if callable(custom_encoder):
             self._encoder = custom_encoder
 
-
     def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS and isinstance(data, (pd.DataFrame, pd.Series)):
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME] and isinstance(
+            data, (pd.DataFrame, pd.Series)
+        ):
             return data
-        elif exposed_type == self._EXPOSED_TYPE_NUMPY and isinstance(data, np.ndarray):
+        elif exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY] and isinstance(
+            data, np.ndarray
+        ):
             return pd.DataFrame(data)
         elif isinstance(data, list) and not isinstance(exposed_type, str):
             return pd.DataFrame.from_records([self._encoder(row) for row in data])

diff --git a/taipy/core/data/csv.py b/taipy/core/data/csv.py
@@ -134,9 +134,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
             path = self._path
 
         properties = self.properties
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
 

diff --git a/taipy/core/data/excel.py b/taipy/core/data/excel.py
@@ -137,7 +137,7 @@ def write_with_column_names(self, data: Any, columns: List[str] = None, editor_i
 
     @staticmethod
     def _check_exposed_type(exposed_type):
-        if isinstance(exposed_type, str):
+        if isinstance(exposed_type, str) or exposed_type in [pd.DataFrame, np.ndarray]:
             _TabularDataNodeMixin._check_exposed_type(exposed_type)
         elif isinstance(exposed_type, list):
             for t in exposed_type:
@@ -154,18 +154,18 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
             path = self._path
 
         exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
 
     def _read_sheet_with_exposed_type(
         self, path: str, sheet_exposed_type: str, sheet_name: str
     ) -> Optional[Union[np.ndarray, pd.DataFrame]]:
-        if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if sheet_exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, sheet_name)
-        elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
+        elif sheet_exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, sheet_name)  # type: ignore
         return None
 
@@ -202,6 +202,10 @@ def _read_as(self, path: str):
                         sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS)
                     elif isinstance(exposed_type, List):
                         sheet_exposed_type = exposed_type[i]
+                    elif exposed_type == np.ndarray:
+                        sheet_exposed_type = self._EXPOSED_TYPE_NUMPY
+                    elif exposed_type == pd.DataFrame:
+                        sheet_exposed_type = self._EXPOSED_TYPE_PANDAS
 
                     if isinstance(sheet_exposed_type, str):
                         sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)

diff --git a/taipy/core/data/parquet.py b/taipy/core/data/parquet.py
@@ -227,9 +227,9 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
         return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs)
 
     def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, kwargs)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, kwargs)
         return self._read_as(path, kwargs)
 
@@ -249,4 +249,3 @@ def _append(self, data: Any):
 
     def _write(self, data: Any):
         self._write_with_kwargs(data)
-
diff --git a/tests/core/data/test_csv_data_node.py b/tests/core/data/test_csv_data_node.py
@@ -157,6 +157,16 @@ def test_pandas_exposed_type(self):
         dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
         assert isinstance(dn.read(), pd.DataFrame)
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pd.DataFrame})
+        assert isinstance(dn.read(), pd.DataFrame)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": np.ndarray})
+        assert isinstance(dn.read(), np.ndarray)
+
     def test_raise_error_invalid_exposed_type(self):
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
         with pytest.raises(InvalidExposedType):

diff --git a/tests/core/data/test_excel_data_node.py b/tests/core/data/test_excel_data_node.py
@@ -302,6 +302,26 @@ def test_pandas_exposed_type(self):
         data = dn.read()
         assert isinstance(data, pd.DataFrame)
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": pd.DataFrame, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == pd.DataFrame
+        data = dn.read()
+        assert isinstance(data, pd.DataFrame)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": np.ndarray, "sheet_name": "Sheet1"}
+        )
+        assert dn.properties["exposed_type"] == np.ndarray
+        data = dn.read()
+        assert isinstance(data, np.ndarray)
+
     def test_complex_exposed_type_dict(self):
         # ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx")

diff --git a/tests/core/data/test_parquet_data_node.py b/tests/core/data/test_parquet_data_node.py
@@ -98,6 +98,20 @@ def test_create(self):
         assert isinstance(dn_1, ParquetDataNode)
         assert dn_1.properties["exposed_type"] == MyCustomObject
 
+        parquet_dn_config_2 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=np.ndarray
+        )
+        dn_2 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_2, None, None)
+        assert isinstance(dn_2, ParquetDataNode)
+        assert dn_2.properties["exposed_type"] == np.ndarray
+
+        parquet_dn_config_3 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=pd.DataFrame
+        )
+        dn_3 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_3, None, None)
+        assert isinstance(dn_3, ParquetDataNode)
+        assert dn_3.properties["exposed_type"] == pd.DataFrame
+
         with pytest.raises(InvalidConfigurationId):
             dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})