From 4277720789636666f27a477a1e350f262c910af3 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 20 Apr 2026 16:59:25 +0200 Subject: [PATCH] fix: use load sentinel and injected config in AliasResolver `AliasResolver._get_alias_map` used `not cls._alias_map` as a 'not loaded' sentinel, conflating an empty dict with 'not yet loaded' and re-hitting the default KVS on every lookup until the first alias was persisted. It also gated `is_at_home` on `Configuration.get_global_configuration()` while the KVS client was built from `self._configuration`, so an injected custom configuration could silently skip writes or target the wrong KVS. Introduce a `_alias_map_loaded` sentinel and use the injected configuration consistently for both gating and client construction. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../_apify/_alias_resolving.py | 11 ++++-- tests/unit/conftest.py | 1 + .../storage_clients/test_alias_resolver.py | 36 +++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/apify/storage_clients/_apify/_alias_resolving.py b/src/apify/storage_clients/_apify/_alias_resolving.py index c07edca3f..abca78fe3 100644 --- a/src/apify/storage_clients/_apify/_alias_resolving.py +++ b/src/apify/storage_clients/_apify/_alias_resolving.py @@ -9,7 +9,6 @@ from apify_client import ApifyClientAsync from ._utils import hash_api_base_url_and_token -from apify._configuration import Configuration if TYPE_CHECKING: from collections.abc import Callable @@ -24,6 +23,8 @@ RequestQueueCollectionClientAsync, ) + from apify._configuration import Configuration + logger = getLogger(__name__) @@ -128,6 +129,9 @@ class AliasResolver: _alias_map: ClassVar[dict[str, str]] = {} """Map containing pre-existing alias storages and their ids. Global for all instances.""" + _alias_map_loaded: ClassVar[bool] = False + """Tracks whether `_alias_map` was fetched from the default KVS — an empty map is a valid loaded state.""" + _alias_init_lock: Lock | None = None """Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances.""" @@ -181,11 +185,12 @@ async def _get_alias_map(cls, configuration: Configuration) -> dict[str, str]: Returns: Map of aliases and storage ids. """ - if not cls._alias_map and Configuration.get_global_configuration().is_at_home: + if not cls._alias_map_loaded and configuration.is_at_home: default_kvs_client = await cls._get_default_kvs_client(configuration) record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY) cls._alias_map = record.get('value', {}) if record else {} + cls._alias_map_loaded = True return cls._alias_map @@ -215,7 +220,7 @@ async def store_mapping(self, storage_id: str) -> None: alias_map = await self._get_alias_map(self._configuration) alias_map[self._storage_key] = storage_id - if not Configuration.get_global_configuration().is_at_home: + if not self._configuration.is_at_home: logging.getLogger(__name__).debug( '_AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.' ) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 8d8297c5d..3cd2e32cc 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -75,6 +75,7 @@ def _prepare_test_env() -> None: # Reset the AliasResolver class state. AliasResolver._alias_map = {} + AliasResolver._alias_map_loaded = False AliasResolver._alias_init_lock = None # Verify that the test environment was set up correctly. diff --git a/tests/unit/storage_clients/test_alias_resolver.py b/tests/unit/storage_clients/test_alias_resolver.py index 0cfe31035..707b6a564 100644 --- a/tests/unit/storage_clients/test_alias_resolver.py +++ b/tests/unit/storage_clients/test_alias_resolver.py @@ -1,5 +1,7 @@ from __future__ import annotations +from unittest.mock import AsyncMock, patch + from apify._configuration import Configuration from apify.storage_clients._apify._alias_resolving import AliasResolver @@ -78,6 +80,40 @@ async def test_get_alias_map_returns_in_memory_map() -> None: assert result == {} +async def test_get_alias_map_loads_from_kvs_only_once_when_empty() -> None: + """An empty KVS response must not trigger repeat fetches on subsequent calls.""" + config = Configuration(is_at_home=True, token='test-token', default_key_value_store_id='default-kvs-id') + + fake_kvs_client = AsyncMock() + fake_kvs_client.get_record = AsyncMock(return_value=None) + + with patch.object(AliasResolver, '_get_default_kvs_client', return_value=fake_kvs_client): + await AliasResolver._get_alias_map(config) + await AliasResolver._get_alias_map(config) + await AliasResolver._get_alias_map(config) + + assert fake_kvs_client.get_record.await_count == 1 + assert AliasResolver._alias_map == {} + + +async def test_store_mapping_uses_injected_configuration_is_at_home() -> None: + """`store_mapping` gates on the injected configuration's `is_at_home`, not the global one.""" + # Global `is_at_home` defaults to False; injected config says True — the KVS write must still happen. + config = Configuration(is_at_home=True, token='test-token', default_key_value_store_id='default-kvs-id') + resolver = AliasResolver(storage_type='Dataset', alias='test-alias', configuration=config) + + fake_kvs_client = AsyncMock() + fake_kvs_client.get_record = AsyncMock(return_value=None) + fake_kvs_client.set_record = AsyncMock(return_value=None) + fake_kvs_client.get = AsyncMock(return_value={'id': 'default-kvs-id'}) + + with patch.object(AliasResolver, '_get_default_kvs_client', return_value=fake_kvs_client): + await resolver.store_mapping(storage_id='new-id-789') + + fake_kvs_client.set_record.assert_awaited_once() + assert AliasResolver._alias_map[resolver._storage_key] == 'new-id-789' + + async def test_configuration_storages_alias_resolving() -> None: """Test that `AliasResolver` correctly resolves ids of storages registered in Configuration."""