build(datasets): Release kedro-datasets 1.7.0 (#326)

kedro-org · Sep 1, 2023 · e6d0f6c · e6d0f6c
1 parent 87a8923
commit e6d0f6c
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 40 deletions.
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
@@ -1,9 +1,19 @@
 # Upcoming Release
 ## Major features and improvements
+## Bug fixes and other changes
+## Community contributions
+
+# Release 1.7.0:
+## Major features and improvements
 * Added `polars.GenericDataSet`, a `GenericDataSet` backed by [polars](https://www.pola.rs/), a lightning fast dataframe package built entirely using Rust.
 
 ## Bug fixes and other changes
+* Fixed broken links in docstrings.
+* Reverted PySpark pin to <4.0.
+
 ## Community contributions
+Many thanks to the following Kedroids for contributing PRs to this release:
+* [Walber Moreira](https://github.com/wmoreiraa)
 
 # Release 1.6.0:
 

diff --git a/kedro-datasets/docs/source/kedro_datasets.rst b/kedro-datasets/docs/source/kedro_datasets.rst
@@ -41,6 +41,7 @@ kedro_datasets
    kedro_datasets.plotly.JSONDataSet
    kedro_datasets.plotly.PlotlyDataSet
    kedro_datasets.polars.CSVDataSet
+   kedro_datasets.polars.GenericDataSet
    kedro_datasets.redis.PickleDataSet
    kedro_datasets.snowflake.SnowparkTableDataSet
    kedro_datasets.spark.DeltaTableDataSet

diff --git a/kedro-datasets/kedro_datasets/__init__.py b/kedro-datasets/kedro_datasets/__init__.py
@@ -1,3 +1,3 @@
 """``kedro_datasets`` is where you can find all of Kedro's data connectors."""
 
-__version__ = "1.6.0"
+__version__ = "1.7.0"
diff --git a/kedro-datasets/kedro_datasets/polars/generic_dataset.py b/kedro-datasets/kedro_datasets/polars/generic_dataset.py
@@ -20,26 +20,29 @@
 
 # pylint: disable=too-many-instance-attributes
 class GenericDataSet(AbstractVersionedDataSet[pl.DataFrame, pl.DataFrame]):
-    """`polars.GenericDataSet` loads/saves data from/to a data file using an underlying
-    filesystem (e.g.: local, S3, GCS). It uses polars to dynamically select the
-    appropriate type of read/write target on a best effort basis.
-    Example usage for the
-    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
-    data_catalog_yaml_examples.html>`_:
+    """``polars.GenericDataSet`` loads/saves data from/to a data file using an underlying
+    filesystem (e.g.: local, S3, GCS). It uses polars to handle the dynamically select the
+    appropriate type of read/write on a best effort basis.
+
+    Example adding a catalog entry with
+    `YAML API
+    <https://docs.kedro.org/en/stable/data/\
+    data_catalog_yaml_examples.html#data-catalog-yaml-examples>`_:
+
     .. code-block:: yaml
-        cars:
-          type: polars.GenericDataSet
-          file_format: parquet
-          filepath: s3://data/01_raw/company/cars.parquet
-          load_args:
-            low_memory: True
-          save_args:
-            compression: "snappy"
-
-    Example usage for the
-    `Python API <https://kedro.readthedocs.io/en/stable/data/\
-    advanced_data_catalog_usage.html>`_:
+
+        >>> cars:
+        >>>   type: polars.GenericDataSet
+        >>>   file_format: parquet
+        >>>   filepath: s3://data/01_raw/company/cars.parquet
+        >>>   load_args:
+        >>>     low_memory: True
+        >>>   save_args:
+        >>>     compression: "snappy"
+
+    Example using Python API:
     ::
+
         >>> from kedro_datasets.polars import GenericDataSet
         >>> import polars as pl
         >>>
@@ -50,6 +53,7 @@ class GenericDataSet(AbstractVersionedDataSet[pl.DataFrame, pl.DataFrame]):
         >>> data_set.save(data)
         >>> reloaded = data_set.load()
         >>> assert data.frame_equal(reloaded)
+
     """
 
     DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
@@ -67,25 +71,25 @@ def __init__(
         fs_args: Dict[str, Any] = None,
     ):
         """Creates a new instance of ``GenericDataSet`` pointing to a concrete data file
-        on a specific filesystem. The appropriate polars load/save methods are
-        dynamically identified by string matching on a best effort basis.
+        on a specific filesystem. The appropriate polars load/save methods are dynamically
+        identified by string matching on a best effort basis.
+
         Args:
             filepath: Filepath in POSIX format to a file prefixed with a protocol like
                 `s3://`.
                 If prefix is not provided, `file` protocol (local filesystem)
                 will be used.
                 The prefix should be any protocol supported by ``fsspec``.
                 Key assumption: The first argument of either load/save method points to
-                a filepath/buffer/io type location. There are some read/write targets
-                such as 'clipboard' or 'records' that will fail since they do not take a
-                filepath like argument.
-            file_format: String which is used to match the appropriate load/save method
-                on a best effort basis. For example if 'csv' is passed the
-                `polars.read_csv` and
+                a filepath/buffer/io type location. There are some read/write targets such
+                as 'clipboard' or 'records' that will fail since they do not take a filepath
+                like argument.
+            file_format: String which is used to match the appropriate load/save method on a
+                best effort basis. For example if 'csv' is passed, the `polars.read_csv` and
                 `polars.DataFrame.write_csv` methods will be identified. An error will
-                be raised unless
-                at least one matching `read_{file_format}` or `write_{file_format}`.
-            load_args: polars options for loading files.
+                be raised unless there is at least one matching `read_<file_format>`
+                or `write_<file_format>`.
+            load_args: Polars options for loading CSV files.
                 Here you can find all available arguments:
                 https://pola-rs.github.io/polars/py-polars/html/reference/io.html
                 All defaults are preserved.
@@ -100,16 +104,12 @@ def __init__(
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Extra arguments to pass into underlying filesystem class constructor
-                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as
-                to pass to the filesystem's `open` method through nested keys
-                `open_args_load` and `open_args_save`.
-                Here you can find all available arguments for `open`:
-                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
-                All defaults are preserved, except `mode`, which is set to `r` when loading
-                and to `w` when saving.
+                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``).
+            metadata: Any arbitrary metadata.
+                This is ignored by Kedro, but may be consumed by users or external plugins.
         Raises:
-            DataSetError: Will be raised if at least less than one appropriate
-                read or write methods are identified.
+            DataSetError: Will be raised if at least less than one appropriate read or write
+                methods are identified.
         """
 
         self._file_format = file_format.lower()

diff --git a/kedro-datasets/setup.py b/kedro-datasets/setup.py
@@ -7,7 +7,7 @@
 SPARK = "pyspark>=2.2, <4.0"
 HDFS = "hdfs>=2.5.8, <3.0"
 S3FS = "s3fs>=0.3.0, <0.5"
-POLARS = "polars~=0.18.0"
+POLARS = "polars>=0.18.0"
 DELTA = "delta-spark~=1.2.1"