fix(catalogs): broaden cache except tuples and reuse validator in fetch_catalog

Addresses Copilot review feedback on this PR (round 4):

1. ``ExtensionCatalog.fetch_catalog`` and ``PresetCatalog.fetch_catalog``
   — the legacy single-catalog methods — still only checked key
   presence. A payload like ``42`` (root non-object) crashed with
   ``TypeError: argument of type 'int' is not iterable`` during the
   ``"schema_version" in catalog_data`` check, and an entry mapping of
   the wrong type crashed downstream. Both now reuse
   ``_validate_catalog_payload`` so the network-side behaviour of the
   legacy methods stays consistent with the multi-catalog
   ``_fetch_single_catalog`` path. (Copilot #3335623482, #3335623556.)

2. The cache-read ``except`` tuples in ``_fetch_single_catalog`` and
   ``fetch_catalog`` were too narrow. ``read_text`` can raise
   ``OSError`` (permissions / disk / handle limit) or ``UnicodeError``
   (cache file written by an older client in a different encoding)
   in addition to ``json.JSONDecodeError``. Without those in the
   tuple, an unreadable cache crashed the caller instead of falling
   through to the network refetch the cache contract documents. Both
   sites now catch ``(json.JSONDecodeError, OSError, UnicodeError,
   <DomainError>)``. (Copilot #3335623588, #3335623608.)

3. While here, pinned ``encoding="utf-8"`` on every cache ``read_text``
   call so cache files written by an older Windows client (with a
   non-UTF-8 default locale) decode the same way on a newer client.

Regression tests:

  - ``test_fetch_catalog_rejects_malformed_payload`` — 7 parametrized
    payloads per file covering root-non-object + nested-bad-type
    variants asserting ``fetch_catalog`` raises the named domain error.
  - ``test_fetch_catalog_recovers_from_unreadable_cache`` — writes
    ``b"\xff\xfe\x00not-utf-8"`` to the cache file and asserts
    ``fetch_catalog`` silently falls through to the mocked network and
    returns the freshly-fetched payload.
This commit is contained in:
Quratulain-bilal
2026-06-01 21:43:49 +05:00
parent 1708e5c32b
commit 7f44b256b8
4 changed files with 215 additions and 23 deletions

View File

@@ -1874,10 +1874,15 @@ class ExtensionCatalog(CatalogStackBase):
# through to the network fetch path so the cache gets refreshed. # through to the network fetch path so the cache gets refreshed.
if is_valid: if is_valid:
try: try:
cached_data = json.loads(cache_file.read_text()) cached_data = json.loads(cache_file.read_text(encoding="utf-8"))
self._validate_catalog_payload(cached_data, entry.url) self._validate_catalog_payload(cached_data, entry.url)
return cached_data return cached_data
except (json.JSONDecodeError, ExtensionError): except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError):
# Cache is best-effort: a JSON-decode failure, an OS-level
# read failure (permissions / disk / handle limit), or a
# text-encoding failure on a cache file written by an older
# client all fall through to the network fetch path. Only
# the network failure is surfaced to the caller.
pass pass
# Fetch from network # Fetch from network
@@ -1994,25 +1999,34 @@ class ExtensionCatalog(CatalogStackBase):
Raises: Raises:
ExtensionError: If catalog cannot be fetched ExtensionError: If catalog cannot be fetched
""" """
# Check cache first unless force refresh
if not force_refresh and self.is_cache_valid():
try:
return json.loads(self.cache_file.read_text())
except json.JSONDecodeError:
pass # Fall through to network fetch
# Fetch from network # Fetch from network
catalog_url = self.get_catalog_url() catalog_url = self.get_catalog_url()
# Check cache first unless force refresh. Match the
# ``_fetch_single_catalog`` cache contract: a poisoned or
# unreadable cache silently falls through to a network refetch
# rather than crashing the caller. ``_validate_catalog_payload``
# is reused here so a cache written by an older client
# (pre-validation) is rejected and refreshed instead of returning
# the stale malformed payload.
if not force_refresh and self.is_cache_valid():
try:
cached_data = json.loads(self.cache_file.read_text(encoding="utf-8"))
self._validate_catalog_payload(cached_data, catalog_url)
return cached_data
except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError):
pass # Fall through to network fetch
try: try:
import urllib.error import urllib.error
with self._open_url(catalog_url, timeout=10) as response: with self._open_url(catalog_url, timeout=10) as response:
catalog_data = json.loads(response.read()) catalog_data = json.loads(response.read())
# Validate catalog structure # Validate catalog structure. Reuses the same helper as
if "schema_version" not in catalog_data or "extensions" not in catalog_data: # ``_fetch_single_catalog`` so all three branches (root type,
raise ExtensionError(f"Invalid catalog format from {catalog_url}") # missing keys, nested-mapping type) stay consistent.
self._validate_catalog_payload(catalog_data, catalog_url)
# Save to cache # Save to cache
self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True)

View File

@@ -2085,10 +2085,15 @@ class PresetCatalog:
# refreshed. # refreshed.
if not force_refresh and self._is_url_cache_valid(entry.url): if not force_refresh and self._is_url_cache_valid(entry.url):
try: try:
cached_data = json.loads(cache_file.read_text()) cached_data = json.loads(cache_file.read_text(encoding="utf-8"))
self._validate_catalog_payload(cached_data, entry.url) self._validate_catalog_payload(cached_data, entry.url)
return cached_data return cached_data
except (json.JSONDecodeError, PresetError): except (json.JSONDecodeError, OSError, UnicodeError, PresetError):
# Cache is best-effort: a JSON-decode failure, an OS-level
# read failure (permissions / disk / handle limit), or a
# text-encoding failure on a cache file written by an
# older client all fall through to the network fetch path.
# Only the network failure is surfaced to the caller.
pass pass
try: try:
@@ -2182,24 +2187,36 @@ class PresetCatalog:
""" """
catalog_url = self.get_catalog_url() catalog_url = self.get_catalog_url()
# Match the ``_fetch_single_catalog`` cache contract: a poisoned
# or unreadable cache silently falls through to a network refetch
# rather than crashing the caller. ``_validate_catalog_payload``
# is reused here so a cache written by an older client
# (pre-validation) is rejected and refreshed instead of returning
# the stale malformed payload.
if not force_refresh and self.is_cache_valid(): if not force_refresh and self.is_cache_valid():
try: try:
metadata = json.loads(self.cache_metadata_file.read_text()) metadata = json.loads(
self.cache_metadata_file.read_text(encoding="utf-8")
)
if metadata.get("catalog_url") == catalog_url: if metadata.get("catalog_url") == catalog_url:
return json.loads(self.cache_file.read_text()) cached_data = json.loads(
except (json.JSONDecodeError, OSError): self.cache_file.read_text(encoding="utf-8")
# Cache is corrupt or unreadable; fall through to network fetch )
self._validate_catalog_payload(cached_data, catalog_url)
return cached_data
except (json.JSONDecodeError, OSError, UnicodeError, PresetError):
# Cache is corrupt, unreadable, or fails the shape check;
# fall through to network fetch.
pass pass
try: try:
with self._open_url(catalog_url, timeout=10) as response: with self._open_url(catalog_url, timeout=10) as response:
catalog_data = json.loads(response.read()) catalog_data = json.loads(response.read())
if ( # Validate catalog structure. Reuses the same helper as
"schema_version" not in catalog_data # ``_fetch_single_catalog`` so all three branches (root type,
or "presets" not in catalog_data # missing keys, nested-mapping type) stay consistent.
): self._validate_catalog_payload(catalog_data, catalog_url)
raise PresetError(f"Invalid preset catalog format from {catalog_url}")
self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True)
self.cache_file.write_text(json.dumps(catalog_data, indent=2)) self.cache_file.write_text(json.dumps(catalog_data, indent=2))

View File

@@ -2692,6 +2692,86 @@ class TestExtensionCatalog:
# The poisoned cache was discarded and the network payload returned. # The poisoned cache was discarded and the network payload returned.
assert result == valid assert result == valid
@pytest.mark.parametrize(
"payload",
[
# Root is not a JSON object.
[],
"oops",
42,
None,
# Root is fine but ``extensions`` is the wrong type.
{"schema_version": "1.0", "extensions": []},
{"schema_version": "1.0", "extensions": "oops"},
{"schema_version": "1.0", "extensions": None},
],
)
def test_fetch_catalog_rejects_malformed_payload(self, temp_dir, payload):
"""Legacy ``fetch_catalog`` reuses the same shape-validation helper.
Before this change ``fetch_catalog`` only checked key presence — so
a payload like ``42`` would crash with
``TypeError: argument of type 'int' is not iterable`` during the
``"schema_version" in catalog_data`` check, and an entry mapping
of the wrong type would crash downstream. Reusing
``_validate_catalog_payload`` keeps the network-side behaviour of
the legacy single-catalog method consistent with the multi-catalog
``_fetch_single_catalog`` path.
"""
from unittest.mock import patch, MagicMock
catalog = self._make_catalog(temp_dir)
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(payload).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
with patch.object(catalog, "_open_url", return_value=mock_response):
with pytest.raises(ExtensionError, match="Invalid catalog format"):
catalog.fetch_catalog(force_refresh=True)
def test_fetch_catalog_recovers_from_unreadable_cache(self, temp_dir):
"""An unreadable / wrong-encoded cache file silently refetches.
The cache contract is best-effort: a JSON-decode failure, an OS
read failure (permissions / disk / handle limit), or an invalid
text encoding on a cache file written by an older client must
all fall through to the network fetch rather than crash the
caller. Covers Copilot's review point that the previous
``except (json.JSONDecodeError,)`` was too narrow.
"""
from unittest.mock import patch, MagicMock
catalog = self._make_catalog(temp_dir)
# Write invalid UTF-8 bytes to the cache file so ``read_text``
# raises ``UnicodeDecodeError`` (a subclass of ``UnicodeError``).
catalog.cache_dir.mkdir(parents=True, exist_ok=True)
catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8")
catalog.cache_metadata_file.write_text(
json.dumps(
{
"cached_at": datetime.now(timezone.utc).isoformat(),
"catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL,
}
),
encoding="utf-8",
)
valid = {
"schema_version": "1.0",
"extensions": {"foo": {"name": "Foo", "version": "1.0.0"}},
}
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(valid).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
with patch.object(catalog, "_open_url", return_value=mock_response):
result = catalog.fetch_catalog(force_refresh=False)
# Recovered via network rather than crashing on the unreadable cache.
assert result == valid
def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir):
"""Per-entry guard: one malformed entry shouldn't poison the merge. """Per-entry guard: one malformed entry shouldn't poison the merge.

View File

@@ -1630,6 +1630,87 @@ class TestPresetCatalog:
# The poisoned cache was discarded and the network payload returned. # The poisoned cache was discarded and the network payload returned.
assert result == valid assert result == valid
@pytest.mark.parametrize(
"payload",
[
# Root is not a JSON object.
[],
"oops",
42,
None,
# Root is fine but ``presets`` is the wrong type.
{"schema_version": "1.0", "presets": []},
{"schema_version": "1.0", "presets": "oops"},
{"schema_version": "1.0", "presets": None},
],
)
def test_fetch_catalog_rejects_malformed_payload(self, project_dir, payload):
"""Legacy ``fetch_catalog`` reuses the same shape-validation helper.
Before this change ``fetch_catalog`` only checked key presence —
so a payload like ``42`` would crash with
``TypeError: argument of type 'int' is not iterable`` during the
``"schema_version" in catalog_data`` check, and an entry mapping
of the wrong type would crash downstream. Reusing
``_validate_catalog_payload`` keeps the network-side behaviour of
the legacy single-catalog method consistent with the multi-catalog
``_fetch_single_catalog`` path.
"""
from unittest.mock import patch, MagicMock
catalog = PresetCatalog(project_dir)
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(payload).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
with patch.object(catalog, "_open_url", return_value=mock_response):
with pytest.raises(PresetError, match="Invalid preset catalog format"):
catalog.fetch_catalog(force_refresh=True)
def test_fetch_catalog_recovers_from_unreadable_cache(self, project_dir):
"""An unreadable / wrong-encoded cache file silently refetches.
The cache contract is best-effort: a JSON-decode failure, an OS
read failure (permissions / disk / handle limit), or an invalid
text encoding on a cache file written by an older client must
all fall through to the network fetch rather than crash the
caller. Covers Copilot's review point that the previous
``except (json.JSONDecodeError, OSError)`` was missing
``UnicodeError``.
"""
from unittest.mock import patch, MagicMock
catalog = PresetCatalog(project_dir)
catalog.cache_dir.mkdir(parents=True, exist_ok=True)
# Invalid UTF-8 bytes so ``read_text`` raises ``UnicodeDecodeError``
# (a subclass of ``UnicodeError``).
catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8")
catalog.cache_metadata_file.write_text(
json.dumps(
{
"cached_at": datetime.now(timezone.utc).isoformat(),
"catalog_url": catalog.get_catalog_url(),
}
),
encoding="utf-8",
)
valid = {
"schema_version": "1.0",
"presets": {"foo": {"name": "Foo", "version": "1.0.0"}},
}
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(valid).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
with patch.object(catalog, "_open_url", return_value=mock_response):
result = catalog.fetch_catalog(force_refresh=False)
# Recovered via network rather than crashing on the unreadable cache.
assert result == valid
def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): def test_get_merged_packs_skips_non_mapping_entries(self, project_dir):
"""Per-entry guard: one malformed entry shouldn't poison the merge. """Per-entry guard: one malformed entry shouldn't poison the merge.