From d1a758e68d1d4c3e74beb8e3109daa7476e80ab0 Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Wed, 8 Nov 2023 14:50:29 -0800
Subject: [PATCH 1/8] SNOW-870432: use_logical_type for inferring timezone in
 pandas dfs

---
 src/snowflake/snowpark/session.py | 11 +++++++--
 tests/integ/test_dataframe.py     | 37 +++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py
index 9c9eb714ad4..7a881e3ebf6 100644
--- a/src/snowflake/snowpark/session.py
+++ b/src/snowflake/snowpark/session.py
@@ -1789,6 +1789,7 @@ def write_pandas(
         create_temp_table: bool = False,
         overwrite: bool = False,
         table_type: Literal["", "temp", "temporary", "transient"] = "",
+        use_logical_type: Optional[bool] = None,
     ) -> Table:
         """Writes a pandas DataFrame to a table in Snowflake and returns a
         Snowpark :class:`DataFrame` object referring to the table where the
@@ -1822,8 +1823,12 @@ def write_pandas(
                 then it truncates the table. Note that in both cases (when overwrite is set to ``True``) it will replace the existing
                 contents of the table with that of the passed in Pandas DataFrame.
             table_type: The table type of table to be created. The supported values are: ``temp``, ``temporary``,
-                        and ``transient``. An empty string means to create a permanent table. Learn more about table
-                        types `here <https://docs.snowflake.com/en/user-guide/tables-temp-transient.html>`_.
+                and ``transient``. An empty string means to create a permanent table. Learn more about table types
+                `here <https://docs.snowflake.com/en/user-guide/tables-temp-transient.html>`_.
+            use_logical_type: Boolean that specifies whether to use Parquet logical types. With this file format option,
+                Snowflake can interpret Parquet logical types during data loading. To enable Parquet logical types,
+                set use_logical_type as True. Set to None to use Snowflakes default. For more information, see:
+                https://docs.snowflake.com/en/sql-reference/sql/create-file-format
 
         Example::
 
@@ -1902,6 +1907,7 @@ def write_pandas(
                 auto_create_table=auto_create_table,
                 overwrite=overwrite,
                 table_type=table_type,
+                use_logical_type=use_logical_type,
             )
         except ProgrammingError as pe:
             if pe.msg.endswith("does not exist"):
@@ -2008,6 +2014,7 @@ def create_dataframe(
                 quote_identifiers=True,
                 auto_create_table=True,
                 table_type="temporary",
+                use_logical_type=True,
             )
             set_api_call_source(t, "Session.create_dataframe[pandas]")
             return t
diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py
index d9c9dae725e..6f99661866d 100644
--- a/tests/integ/test_dataframe.py
+++ b/tests/integ/test_dataframe.py
@@ -68,6 +68,7 @@
     StringType,
     StructField,
     StructType,
+    TimestampTimeZone,
     TimestampType,
     TimeType,
     VariantType,
@@ -1478,6 +1479,41 @@ def test_create_dataframe_with_semi_structured_data_types(session):
     )
 
 
+@pytest.mark.skipif(not is_pandas_available, reason="pandas is required")
+def test_create_dataframe_with_pandas_df(session):
+    data = {
+        "pandas_datetime": ["2021-09-30 12:00:00", "2021-09-30 13:00:00"],
+        "date": [pd.to_datetime("2010-1-1"), pd.to_datetime("2011-1-1")],
+        "datetime.datetime": [
+            datetime.datetime(2010, 1, 1),
+            datetime.datetime(2010, 1, 1),
+        ],
+    }
+    pdf = pd.DataFrame(data)
+    pdf["pandas_datetime"] = pd.to_datetime(pdf["pandas_datetime"])
+    df = session.create_dataframe(pdf)
+
+    assert df.schema[0].name == '"pandas_datetime"'
+    assert df.schema[1].name == '"date"'
+    assert df.schema[2].name == '"datetime.datetime"'
+    assert df.schema[0].datatype == TimestampType(TimestampTimeZone.NTZ)
+    assert df.schema[1].datatype == TimestampType(TimestampTimeZone.NTZ)
+    assert df.schema[2].datatype == TimestampType(TimestampTimeZone.NTZ)
+
+    # test with timezone added to timestamp
+    pdf["pandas_datetime"] = pdf["pandas_datetime"].dt.tz_localize("US/Pacific")
+    pdf["date"] = pdf["date"].dt.tz_localize("US/Pacific")
+    pdf["datetime.datetime"] = pdf["datetime.datetime"].dt.tz_localize("US/Pacific")
+    df = session.create_dataframe(pdf)
+
+    assert df.schema[0].name == '"pandas_datetime"'
+    assert df.schema[1].name == '"date"'
+    assert df.schema[2].name == '"datetime.datetime"'
+    assert df.schema[0].datatype == TimestampType(TimestampTimeZone.LTZ)
+    assert df.schema[1].datatype == TimestampType(TimestampTimeZone.LTZ)
+    assert df.schema[2].datatype == TimestampType(TimestampTimeZone.LTZ)
+
+
 def test_create_dataframe_with_dict(session):
     data = {f"snow_{idx + 1}": idx**3 for idx in range(5)}
     expected_names = [name.upper() for name in data.keys()]
@@ -1958,6 +1994,7 @@ def test_case_insensitive_collect(session):
     assert row["p@$$w0rd"] == "test"
     assert row["P@$$W0RD"] == "test"
 
+
 def test_case_insensitive_local_iterator(session):
     df = session.create_dataframe(
         [["Gordon", 153]], schema=["firstname", "matches_won"]

From beb7bf07fc0538fc7b8b7a7a5495cf3516df272e Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Thu, 9 Nov 2023 15:30:28 -0800
Subject: [PATCH 2/8] changelog and dependency updates

---
 CHANGELOG.md | 14 ++++++++++++++
 setup.py     |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9231212f625..b98a084e164 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,20 @@
 
 ### New Features
 
+- Added parameter for `use_logical_type` in `Session.write_pandas` to allow correct inference of pandas timestamp types from parquet files.
+
+### Dependency Updates
+
+- Updated ``snowflake-connector-python`` to 3.4.0.
+
+### Bug Fixes
+
+- Fixed a bug in `Session.create_dataframe` where the snowpark dataframes created using pandas dataframes were not inferring the type for timestamp columns correctly.
+
+## 1.10.0 (2023-11-03)
+
+### New Features
+
 - Added support for managing case sensitivity in `DataFrame.to_local_iterator()`.
 - Added support for specifying vectorized UDTF's input column names by using the optional parameter `input_names` in `UDTFRegistration.register/register_file` and `functions.pandas_udtf`. By default, `RelationalGroupedDataFrame.applyInPandas` will infer the column names from current dataframe schema.
 - Add `sql_error_code` and `raw_message` attributes to `SnowflakeSQLException` when it is caused by a SQL exception.
diff --git a/setup.py b/setup.py
index 9dcead7dd22..54b75bb0783 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
 THIS_DIR = os.path.dirname(os.path.realpath(__file__))
 SRC_DIR = os.path.join(THIS_DIR, "src")
 SNOWPARK_SRC_DIR = os.path.join(SRC_DIR, "snowflake", "snowpark")
-CONNECTOR_DEPENDENCY_VERSION = ">=3.2.0, <4.0.0"
+CONNECTOR_DEPENDENCY_VERSION = ">=3.4.0, <4.0.0"
 INSTALL_REQ_LIST = [
     "setuptools>=40.6.0",
     "wheel",

From 182537f9f6d7b1f57d3948a12c2aa56ee8b0caff Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Thu, 9 Nov 2023 15:31:04 -0800
Subject: [PATCH 3/8] fix release number

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b98a084e164..efc91ad4706 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Release History
 
-## 1.10.0 (2023-11-03)
+## 1.11.0 (TBD)
 
 ### New Features
 

From 0ea1145a02c54019e8eb9c6eb7479beea155f2bf Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Fri, 10 Nov 2023 10:45:36 -0800
Subject: [PATCH 4/8] provide additional details about the correct behavior

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index efc91ad4706..a1e92f1487e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,11 @@
 
 ### Bug Fixes
 
-- Fixed a bug in `Session.create_dataframe` where the snowpark dataframes created using pandas dataframes were not inferring the type for timestamp columns correctly.
+- Fixed a bug in `Session.create_dataframe` where the snowpark dataframes created using pandas dataframes were not inferring the type for timestamp columns correctly. The behavior is as follows:
+  - Earlier timestamp columns without a timezone would be inferred as `LongType()` but will now be correctly inferred as `TimestampType(TimestampTimeZone.NTZ)`.
+  - Earlier timestamp columns without a timezone would be converted to nanosecond epochs, but will now be correctly be maintained as timestamp values.
+  - Earlier timestamp columns with a timezone would be inferred as `TimestampType(TimestampTimeZone.NTZ)` but will now be correctly inferred as `TimestampType(TimestampTimeZone.LTZ)`.
+  - Earlier timestamp columns with a timezone would loose timezone information and read incorrect time, but now the timezone information will be retained and time will be stored correctly.
 
 ## 1.10.0 (2023-11-03)
 

From ee7e7568838976d0144eed5233cf135e830d463a Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Mon, 13 Nov 2023 15:04:11 -0800
Subject: [PATCH 5/8] use session param to control behavior

---
 src/snowflake/snowpark/session.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py
index 7a881e3ebf6..72545ca375d 100644
--- a/src/snowflake/snowpark/session.py
+++ b/src/snowflake/snowpark/session.py
@@ -167,6 +167,9 @@
     "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS"
 )
 _PYTHON_SNOWPARK_USE_SQL_SIMPLIFIER_STRING = "PYTHON_SNOWPARK_USE_SQL_SIMPLIFIER"
+_PYTHON_SNOWPARK_USE_LOGICAL_TYPE_FOR_CREATE_DATAFRAME_STRING = (
+    "PYTHON_SNOWPARK_USE_LOGICAL_TYPE_FOR_CREATE_DATAFRAME"
+)
 WRITE_PANDAS_CHUNK_SIZE: int = 100000 if is_in_stored_procedure() else None
 
 
@@ -397,6 +400,11 @@ def __init__(
                 _PYTHON_SNOWPARK_USE_SQL_SIMPLIFIER_STRING, True
             )
         )
+        self._use_logical_type_for_create_df: bool = (
+            self._conn._get_client_side_session_parameter(
+                _PYTHON_SNOWPARK_USE_LOGICAL_TYPE_FOR_CREATE_DATAFRAME_STRING, True
+            )
+        )
         self._custom_package_usage_config: Dict = {}
         self._conf = self.RuntimeConfig(self, options or {})
         self._tmpdir_handler: Optional[tempfile.TemporaryDirectory] = None
@@ -2014,7 +2022,7 @@ def create_dataframe(
                 quote_identifiers=True,
                 auto_create_table=True,
                 table_type="temporary",
-                use_logical_type=True,
+                use_logical_type=self._use_logical_type_for_create_df,
             )
             set_api_call_source(t, "Session.create_dataframe[pandas]")
             return t

From 99bd36f0c5d1828c17e3d65c162bd50b7fdb30b6 Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Mon, 13 Nov 2023 15:05:41 -0800
Subject: [PATCH 6/8] changelog  updates

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a1e92f1487e..bec5ca22827 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@
   - Earlier timestamp columns without a timezone would be converted to nanosecond epochs, but will now be correctly be maintained as timestamp values.
   - Earlier timestamp columns with a timezone would be inferred as `TimestampType(TimestampTimeZone.NTZ)` but will now be correctly inferred as `TimestampType(TimestampTimeZone.LTZ)`.
   - Earlier timestamp columns with a timezone would loose timezone information and read incorrect time, but now the timezone information will be retained and time will be stored correctly.
+  - Set session parameter `PYTHON_SNOWPARK_USE_LOGICAL_TYPE_FOR_CREATE_DATAFRAME` to revert back to old behavior. It is recommended that you update your code soon to align with correct behavior as the parameter will be removed in the future.
 
 ## 1.10.0 (2023-11-03)
 

From ae9fc0f3a28045723808dd68ba579cf66ac5699a Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Fri, 29 Dec 2023 10:31:41 -0800
Subject: [PATCH 7/8] fix merge

---
 CHANGELOG.md                      | 7 +------
 src/snowflake/snowpark/session.py | 1 +
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d440a8c66ef..ad2ce36e67a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,7 +34,6 @@
 
 ### New Features
 
-- Added parameter for `use_logical_type` in `Session.write_pandas` to allow correct inference of pandas timestamp types from parquet files.
 - Add the `conn_error` attribute to `SnowflakeSQLException` that stores the whole underlying exception from `snowflake-connector-python`.
 - Added support for `RelationalGroupedDataframe.pivot()` to access `pivot` in the following pattern `Dataframe.group_by(...).pivot(...)`.
 - Added experimental feature: Local Testing Mode, which allows you to create and operate on Snowpark Python DataFrames locally without connecting to a Snowflake account. You can use the local testing framework to test your DataFrame operations locally, on your development machine or in a CI (continuous integration) pipeline, before deploying code changes to your account.
@@ -42,15 +41,11 @@
 - Added support for `arrays_to_object` new functions in `snowflake.snowpark.functions`.
 - Added support for the vector data type.
 
-## Dependency Updates
+### Dependency Updates
 
 - Bumped cloudpickle dependency to work with `cloudpickle==2.2.1`
 - Updated ``snowflake-connector-python`` to `3.4.0`.
 
-### Dependency Updates
-
-- Updated ``snowflake-connector-python`` to 3.4.0.
-
 ### Bug Fixes
 
 - DataFrame column names quoting check now supports newline characters.
diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py
index 4f692ffbd2f..b3cd66b2d9a 100644
--- a/src/snowflake/snowpark/session.py
+++ b/src/snowflake/snowpark/session.py
@@ -1865,6 +1865,7 @@ def write_pandas(
         create_temp_table: bool = False,
         overwrite: bool = False,
         table_type: Literal["", "temp", "temporary", "transient"] = "",
+        **kwargs: Dict[str, Any],
     ) -> Table:
         """Writes a pandas DataFrame to a table in Snowflake and returns a
         Snowpark :class:`DataFrame` object referring to the table where the

From c816451a8484902e417f7076b49049e56aa7185d Mon Sep 17 00:00:00 2001
From: Afroz Alam <afroz.alam@snowflake.com>
Date: Tue, 2 Jan 2024 15:39:05 -0800
Subject: [PATCH 8/8] simplify changelog

---
 CHANGELOG.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad2ce36e67a..7212193c5e7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,10 +15,8 @@
 - Fixed a bug in `DataFrame.na.fill` that caused Boolean values to erroneously override integer values.
 - Fixed sql simplifier for filter with window function columns in select.
 - Fixed a bug in `Session.create_dataframe` where the snowpark dataframes created using pandas dataframes were not inferring the type for timestamp columns correctly. The behavior is as follows:
-  - Earlier timestamp columns without a timezone would be inferred as `LongType()` but will now be correctly inferred as `TimestampType(TimestampTimeZone.NTZ)`.
-  - Earlier timestamp columns without a timezone would be converted to nanosecond epochs, but will now be correctly be maintained as timestamp values.
-  - Earlier timestamp columns with a timezone would be inferred as `TimestampType(TimestampTimeZone.NTZ)` but will now be correctly inferred as `TimestampType(TimestampTimeZone.LTZ)`.
-  - Earlier timestamp columns with a timezone would loose timezone information and read incorrect time, but now the timezone information will be retained and time will be stored correctly.
+  - Earlier timestamp columns without a timezone would be converted to nanosecond epochs and inferred as `LongType()`, but will now be correctly be maintained as timestamp values and be inferred as `TimestampType(TimestampTimeZone.NTZ)`.
+  - Earlier timestamp columns with a timezone would be inferred as `TimestampType(TimestampTimeZone.NTZ)` and loose timezone information but will now be correctly inferred as `TimestampType(TimestampTimeZone.LTZ)` and timezone information is retained correctly.
   - Set session parameter `PYTHON_SNOWPARK_USE_LOGICAL_TYPE_FOR_CREATE_DATAFRAME` to revert back to old behavior. It is recommended that you update your code soon to align with correct behavior as the parameter will be removed in the future.
 
 ## 1.11.1 (2023-12-07)