-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix token refresh deadlock issue (#134)
We have gotten reports of infinite lopping for the last ~4 minutes of token ttl when reading parquet form GCS with Dapla toolbelt. The issue should be resolved in this PR by override the refresh_handler, this is the intended way to provide credentials with custom logic for fetching tokens and it does not result in a deadlock issues. Previously, we directly overrode the refresh method. However, this approach led to deadlock issues in gcsfs/credentials.py's maybe_refresh method. Other changes: Since we can't force a refresh, the threshold is lowered to the old value of 20s to keep us from waiting ~4 minutes for a new token. Refresh window was modified in: googleapis/google-auth-library-python@c6af1d6 Issue recreation steps for https://jupyter.dapla.ssb.no/ and Dapla Lab VSCode: This code would freeze when ttl of token was less than 3m 45s: import time import dapla as dp import pandas as pd import inspect import logging logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) hvilkensomhelststi = "ssb-prod-dapla-felles-data-delt/GIS/testdata/noen_boligbygg_oslo.parquet" while True: print(dp.read_pandas(hvilkensomhelststi)) print(f"{pd.Timestamp.now().round('s')=}") time.sleep(0.1)
- Loading branch information
Showing
5 changed files
with
79 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "dapla-toolbelt" | ||
version = "2.0.11" | ||
version = "2.0.12" | ||
description = "Dapla Toolbelt" | ||
authors = ["Dapla Developers <[email protected]>"] | ||
license = "MIT" | ||
|
@@ -54,6 +54,7 @@ types-requests = ">=2.28.11" | |
pyarrow-stubs = ">=10.0.1.7" | ||
google-auth-stubs = ">=0.2.0" # Not maintained by Google, should change if Google releases their own stubs | ||
pandas-stubs = ">=2.0.0" | ||
pytest-timeout = "^2.3.1" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = ["src"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,47 @@ | ||
from datetime import timedelta | ||
from unittest.mock import Mock | ||
from unittest.mock import patch | ||
|
||
import pytest | ||
from gcsfs.retry import HttpError | ||
from google.auth._helpers import utcnow | ||
|
||
from dapla import pandas as dp | ||
from dapla.gcs import GCSFileSystem | ||
|
||
|
||
def test_instance() -> None: | ||
# Chack that instantiation works with the current version of pyarrow | ||
client = GCSFileSystem() | ||
assert client is not None | ||
|
||
|
||
@pytest.mark.timeout( | ||
30 | ||
) # Times the test out after 30 sec, this is will happen if a deadlock happens | ||
@patch("dapla.auth.AuthClient.is_ready") | ||
@patch("dapla.auth.AuthClient.fetch_google_token") | ||
def test_gcs_deadlock(mock_fetch_google_token: Mock, mock_is_ready: Mock) -> None: | ||
# When overriding the refresh method we experienced a deadlock, resulting in the credentials never being refreshed | ||
# This test checks that the credentials object is updated on refresh | ||
# and that it proceeds to the next step when a valid token is provided. | ||
|
||
mock_is_ready.return_value = True # Mock client ready to not use ADC | ||
mock_fetch_google_token.side_effect = [ | ||
("FakeToken1", utcnow()), # type: ignore[no-untyped-call] | ||
("FakeToken2", utcnow()), # type: ignore[no-untyped-call] | ||
("FakeToken3", utcnow()), # type: ignore[no-untyped-call] | ||
("FakeToken4", utcnow()), # type: ignore[no-untyped-call] | ||
("FakeToken5Valid", utcnow() + timedelta(seconds=30)), # type: ignore[no-untyped-call] | ||
] | ||
|
||
gcs_path = "gs://ssb-dapla-pseudo-data-produkt-test/integration_tests_data/personer.parquet" | ||
with pytest.raises( | ||
HttpError | ||
) as exc_info: # Since we supply invalid credentials an error should be raised | ||
dp.read_pandas(gcs_path) | ||
assert "Invalid Credentials" in str(exc_info.value) | ||
assert ( | ||
mock_fetch_google_token.call_count == 5 | ||
) # mock_fetch_google_token is called as part of refresh | ||
# until a token that has not expired is returned |