Skip to content

Commit

Permalink
Fix token refresh deadlock issue (#134)
Browse files Browse the repository at this point in the history
We have gotten reports of infinite lopping for the last ~4 minutes of token ttl when reading parquet form GCS with Dapla toolbelt. The issue should be resolved in this PR by override the refresh_handler, this is the intended way to provide credentials with custom logic for fetching tokens and it does not result in a deadlock issues.

Previously, we directly overrode the refresh method. However, this approach led to deadlock issues in gcsfs/credentials.py's maybe_refresh method.

Other changes:
Since we can't force a refresh, the threshold is lowered to the old value of 20s to keep us from waiting ~4 minutes for a new token.
Refresh window was modified in: googleapis/google-auth-library-python@c6af1d6

Issue recreation steps for https://jupyter.dapla.ssb.no/ and Dapla Lab VSCode:
This code would freeze when ttl of token was less than 3m 45s:

import time
import dapla as dp
import pandas as pd
import inspect
import logging

logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
hvilkensomhelststi = "ssb-prod-dapla-felles-data-delt/GIS/testdata/noen_boligbygg_oslo.parquet"

while True:
    print(dp.read_pandas(hvilkensomhelststi))
    print(f"{pd.Timestamp.now().round('s')=}")
    time.sleep(0.1)
  • Loading branch information
Andilun authored Mar 22, 2024
1 parent 41574e5 commit 15df97c
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 27 deletions.
6 changes: 5 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,11 @@ def typeguard(session: Session) -> None:
"""Runtime type checking using Typeguard."""
session.install(".")
session.install("pytest", "typeguard", "pygments", "responses")
session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
session.run(
"pytest",
f"--typeguard-packages={package} --ignore=tests/test_auth.py",
*session.posargs,
)


@session(python=python_versions)
Expand Down
27 changes: 15 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dapla-toolbelt"
version = "2.0.11"
version = "2.0.12"
description = "Dapla Toolbelt"
authors = ["Dapla Developers <[email protected]>"]
license = "MIT"
Expand Down Expand Up @@ -54,6 +54,7 @@ types-requests = ">=2.28.11"
pyarrow-stubs = ">=10.0.1.7"
google-auth-stubs = ">=0.2.0" # Not maintained by Google, should change if Google releases their own stubs
pandas-stubs = ">=2.0.0"
pytest-timeout = "^2.3.1"

[tool.pytest.ini_options]
pythonpath = ["src"]
Expand Down
30 changes: 17 additions & 13 deletions src/dapla/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from functools import partial
from typing import Any
from typing import Optional

Expand All @@ -16,6 +15,13 @@
from IPython.display import display
from jupyterhub.services.auth import HubAuth

# Refresh window was modified in: https://github.com/googleapis/google-auth-library-python/commit/c6af1d692b43833baca978948376739547cf685a
# The change was directed towards high latency environments, and should not apply to us.
# Since we can't force a refresh, the threshold is lowered to keep us from waiting ~4 minutes for a new token.
# A permanent fix would be to supply credentials with a refresh endpoint
# that allways returns a token that is valid for more than 3m 45s.
google.auth._helpers.REFRESH_THRESHOLD = timedelta(seconds=20)


class AuthClient:
"""Client for retrieving authentication information."""
Expand Down Expand Up @@ -107,24 +113,22 @@ def fetch_google_credentials() -> Credentials:
"""
if AuthClient.is_ready():
try:

def _refresh_handler(
request: google.auth.transport.Request, scopes: Sequence[str]
) -> tuple[str, datetime]:
# We manually override the refresh_handler method with our custom logic for fetching tokens.
# Previously, we directly overrode the `refresh` method. However, this
# approach led to deadlock issues in gcsfs/credentials.py's maybe_refresh method.
return AuthClient.fetch_google_token()

token, expiry = AuthClient.fetch_google_token()
credentials = Credentials(
token=token,
expiry=expiry,
token_uri="https://oauth2.googleapis.com/token",
refresh_handler=_refresh_handler,
)

def _refresh(self: Credentials, request: Any) -> None:
token, expiry = AuthClient.fetch_google_token(request)
self.token = token
self.expiry = expiry

# We need to manually override the refresh method.
# This is because the "Credentials" class' built-in refresh method
# requires that the token be *at least valid for 3 minutes and 45 seconds*.
# We cannot make this guarantee in JupyterHub due to the implementation
# of our TokenExchange endpoint.
credentials.refresh = partial(_refresh, credentials) # type: ignore[method-assign]
except AuthError as err:
err._print_warning()

Expand Down
40 changes: 40 additions & 0 deletions tests/test_gcs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,47 @@
from datetime import timedelta
from unittest.mock import Mock
from unittest.mock import patch

import pytest
from gcsfs.retry import HttpError
from google.auth._helpers import utcnow

from dapla import pandas as dp
from dapla.gcs import GCSFileSystem


def test_instance() -> None:
# Chack that instantiation works with the current version of pyarrow
client = GCSFileSystem()
assert client is not None


@pytest.mark.timeout(
30
) # Times the test out after 30 sec, this is will happen if a deadlock happens
@patch("dapla.auth.AuthClient.is_ready")
@patch("dapla.auth.AuthClient.fetch_google_token")
def test_gcs_deadlock(mock_fetch_google_token: Mock, mock_is_ready: Mock) -> None:
# When overriding the refresh method we experienced a deadlock, resulting in the credentials never being refreshed
# This test checks that the credentials object is updated on refresh
# and that it proceeds to the next step when a valid token is provided.

mock_is_ready.return_value = True # Mock client ready to not use ADC
mock_fetch_google_token.side_effect = [
("FakeToken1", utcnow()), # type: ignore[no-untyped-call]
("FakeToken2", utcnow()), # type: ignore[no-untyped-call]
("FakeToken3", utcnow()), # type: ignore[no-untyped-call]
("FakeToken4", utcnow()), # type: ignore[no-untyped-call]
("FakeToken5Valid", utcnow() + timedelta(seconds=30)), # type: ignore[no-untyped-call]
]

gcs_path = "gs://ssb-dapla-pseudo-data-produkt-test/integration_tests_data/personer.parquet"
with pytest.raises(
HttpError
) as exc_info: # Since we supply invalid credentials an error should be raised
dp.read_pandas(gcs_path)
assert "Invalid Credentials" in str(exc_info.value)
assert (
mock_fetch_google_token.call_count == 5
) # mock_fetch_google_token is called as part of refresh
# until a token that has not expired is returned

0 comments on commit 15df97c

Please sign in to comment.