Add two more blog articles to the README.md #23
64 errors, 461 pass in 5h 2m 23s
592 files 592 suites 5h 2m 23s ⏱️
525 tests 461 ✅ 0 💤 0 ❌ 64 🔥
19 354 runs 18 894 ✅ 371 💤 25 ❌ 64 🔥
Results for commit 15ee454.
Annotations
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_check_schema (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 30m 35s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_check_schema (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 5m 34s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:13:52.457191502+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_check_schema>
def test_check_schema(self):
@contextlib.contextmanager
def test_requirement(error_message: str):
with self.assertRaises(ValueError) as e:
yield
self.assertEqual((error_message, ), e.exception.args)
with self.subTest("duplicate columns"):
with test_requirement("The datasets have duplicate columns.\n"
"Left column names: id, id\nRight column names: id, id"):
> self.left_df.select("id", "id").diff(self.right_df.select("id", "id"), "id")
python/test/test_diff.py:200:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:13:52.457191502+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 5m 52s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:19:45.156904787+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff>
def test_dataframe_diff(self):
> diff = self.left_df.diff(self.right_df, 'id').orderBy('id').collect()
python/test/test_diff.py:484:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:19:45.156904787+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_changes (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_changes (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 4m 39s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:24:24.264889186+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_changes>
def test_dataframe_diff_with_changes(self):
options = DiffOptions().with_change_column('changes')
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:517:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:24:24.264889186+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_default_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_default_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 5m 37s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:30:01.618737682+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_default_options>
def test_dataframe_diff_with_default_options(self):
> diff = self.left_df.diff_with_options(self.right_df, DiffOptions(), 'id').orderBy('id').collect()
python/test/test_diff.py:502:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:30:01.618737682+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:30:01.618737682+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_diff_mode_column_by_column (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_diff_mode_column_by_column (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 6m 21s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:36:23.659083438+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_column_by_column>
def test_dataframe_diff_with_diff_mode_column_by_column(self):
options = DiffOptions().with_diff_mode(DiffMode.ColumnByColumn)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:522:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:36:23.659083438+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_diff_mode_left_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_diff_mode_left_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 6m 49s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:43:13.244659238+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_left_side>
def test_dataframe_diff_with_diff_mode_left_side(self):
options = DiffOptions().with_diff_mode(DiffMode.LeftSide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:532:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:43:13.244659238+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_diff_mode_right_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_diff_mode_right_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 6m 14s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:49:27.41142313+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_right_side>
def test_dataframe_diff_with_diff_mode_right_side(self):
options = DiffOptions().with_diff_mode(DiffMode.RightSide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:537:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:49:27.41142313+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_diff_mode_side_by_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_diff_mode_side_by_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 4m 27s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:53:54.68415159+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_side_by_side>
def test_dataframe_diff_with_diff_mode_side_by_side(self):
options = DiffOptions().with_diff_mode(DiffMode.SideBySide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:527:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-12-11T08:53:54.68415159+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 6m 30s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:00:25.688879075+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_ignored>
def test_dataframe_diff_with_ignored(self):
> diff = self.left_df.diff(self.right_df, ['id'], ['label']).orderBy('id').collect()
python/test/test_diff.py:488:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:00:25.688879075+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:00:25.688879075+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 4m 55s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:05:21.29728083+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_options>
def test_dataframe_diff_with_options(self):
options = DiffOptions('d', 'l', 'r', 'i', 'c', 'r', 'n', None)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:507:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...t to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:05:21.29728083+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:05:21.29728083+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_options_and_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_options_and_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 4m 51s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:10:12.653592029+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_options_and_ignored>
def test_dataframe_diff_with_options_and_ignored(self):
options = DiffOptions('d', 'l', 'r', 'i', 'c', 'r', 'n', None)
> diff = self.left_df.diff_with_options(self.right_df, options, ['id'], ['label']).orderBy('id').collect()
python/test/test_diff.py:512:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:10:12.653592029+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:10:12.653592029+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diff_with_sparse_mode (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs failed: test_dataframe_diff_with_sparse_mode (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.8)/test-results-connect/pytest-1733904497.050980911-603.xml [took 4m 8s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:14:21.220822164+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_sparse_mode>
def test_dataframe_diff_with_sparse_mode(self):
options = DiffOptions().with_sparse_mode(True)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:542:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7f89b0923f10>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:14:21.220822164+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T09:14:21.220822164+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 30 runs with error: test_dataframe_diffwith (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1733904497.773686954-12645.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7fddf41ded00>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-12-11T08:38:53.605549007+00:00"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException