mirror of
https://github.com/github/spec-kit.git
synced 2026-07-03 20:36:23 +08:00
fix(catalogs): broaden cache except tuples and reuse validator in fetch_catalog
Addresses Copilot review feedback on this PR (round 4): 1. ``ExtensionCatalog.fetch_catalog`` and ``PresetCatalog.fetch_catalog`` — the legacy single-catalog methods — still only checked key presence. A payload like ``42`` (root non-object) crashed with ``TypeError: argument of type 'int' is not iterable`` during the ``"schema_version" in catalog_data`` check, and an entry mapping of the wrong type crashed downstream. Both now reuse ``_validate_catalog_payload`` so the network-side behaviour of the legacy methods stays consistent with the multi-catalog ``_fetch_single_catalog`` path. (Copilot #3335623482, #3335623556.) 2. The cache-read ``except`` tuples in ``_fetch_single_catalog`` and ``fetch_catalog`` were too narrow. ``read_text`` can raise ``OSError`` (permissions / disk / handle limit) or ``UnicodeError`` (cache file written by an older client in a different encoding) in addition to ``json.JSONDecodeError``. Without those in the tuple, an unreadable cache crashed the caller instead of falling through to the network refetch the cache contract documents. Both sites now catch ``(json.JSONDecodeError, OSError, UnicodeError, <DomainError>)``. (Copilot #3335623588, #3335623608.) 3. While here, pinned ``encoding="utf-8"`` on every cache ``read_text`` call so cache files written by an older Windows client (with a non-UTF-8 default locale) decode the same way on a newer client. Regression tests: - ``test_fetch_catalog_rejects_malformed_payload`` — 7 parametrized payloads per file covering root-non-object + nested-bad-type variants asserting ``fetch_catalog`` raises the named domain error. - ``test_fetch_catalog_recovers_from_unreadable_cache`` — writes ``b"\xff\xfe\x00not-utf-8"`` to the cache file and asserts ``fetch_catalog`` silently falls through to the mocked network and returns the freshly-fetched payload.
This commit is contained in:
@@ -1874,10 +1874,15 @@ class ExtensionCatalog(CatalogStackBase):
|
|||||||
# through to the network fetch path so the cache gets refreshed.
|
# through to the network fetch path so the cache gets refreshed.
|
||||||
if is_valid:
|
if is_valid:
|
||||||
try:
|
try:
|
||||||
cached_data = json.loads(cache_file.read_text())
|
cached_data = json.loads(cache_file.read_text(encoding="utf-8"))
|
||||||
self._validate_catalog_payload(cached_data, entry.url)
|
self._validate_catalog_payload(cached_data, entry.url)
|
||||||
return cached_data
|
return cached_data
|
||||||
except (json.JSONDecodeError, ExtensionError):
|
except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError):
|
||||||
|
# Cache is best-effort: a JSON-decode failure, an OS-level
|
||||||
|
# read failure (permissions / disk / handle limit), or a
|
||||||
|
# text-encoding failure on a cache file written by an older
|
||||||
|
# client all fall through to the network fetch path. Only
|
||||||
|
# the network failure is surfaced to the caller.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Fetch from network
|
# Fetch from network
|
||||||
@@ -1994,25 +1999,34 @@ class ExtensionCatalog(CatalogStackBase):
|
|||||||
Raises:
|
Raises:
|
||||||
ExtensionError: If catalog cannot be fetched
|
ExtensionError: If catalog cannot be fetched
|
||||||
"""
|
"""
|
||||||
# Check cache first unless force refresh
|
|
||||||
if not force_refresh and self.is_cache_valid():
|
|
||||||
try:
|
|
||||||
return json.loads(self.cache_file.read_text())
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass # Fall through to network fetch
|
|
||||||
|
|
||||||
# Fetch from network
|
# Fetch from network
|
||||||
catalog_url = self.get_catalog_url()
|
catalog_url = self.get_catalog_url()
|
||||||
|
|
||||||
|
# Check cache first unless force refresh. Match the
|
||||||
|
# ``_fetch_single_catalog`` cache contract: a poisoned or
|
||||||
|
# unreadable cache silently falls through to a network refetch
|
||||||
|
# rather than crashing the caller. ``_validate_catalog_payload``
|
||||||
|
# is reused here so a cache written by an older client
|
||||||
|
# (pre-validation) is rejected and refreshed instead of returning
|
||||||
|
# the stale malformed payload.
|
||||||
|
if not force_refresh and self.is_cache_valid():
|
||||||
|
try:
|
||||||
|
cached_data = json.loads(self.cache_file.read_text(encoding="utf-8"))
|
||||||
|
self._validate_catalog_payload(cached_data, catalog_url)
|
||||||
|
return cached_data
|
||||||
|
except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError):
|
||||||
|
pass # Fall through to network fetch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
|
||||||
with self._open_url(catalog_url, timeout=10) as response:
|
with self._open_url(catalog_url, timeout=10) as response:
|
||||||
catalog_data = json.loads(response.read())
|
catalog_data = json.loads(response.read())
|
||||||
|
|
||||||
# Validate catalog structure
|
# Validate catalog structure. Reuses the same helper as
|
||||||
if "schema_version" not in catalog_data or "extensions" not in catalog_data:
|
# ``_fetch_single_catalog`` so all three branches (root type,
|
||||||
raise ExtensionError(f"Invalid catalog format from {catalog_url}")
|
# missing keys, nested-mapping type) stay consistent.
|
||||||
|
self._validate_catalog_payload(catalog_data, catalog_url)
|
||||||
|
|
||||||
# Save to cache
|
# Save to cache
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|||||||
@@ -2085,10 +2085,15 @@ class PresetCatalog:
|
|||||||
# refreshed.
|
# refreshed.
|
||||||
if not force_refresh and self._is_url_cache_valid(entry.url):
|
if not force_refresh and self._is_url_cache_valid(entry.url):
|
||||||
try:
|
try:
|
||||||
cached_data = json.loads(cache_file.read_text())
|
cached_data = json.loads(cache_file.read_text(encoding="utf-8"))
|
||||||
self._validate_catalog_payload(cached_data, entry.url)
|
self._validate_catalog_payload(cached_data, entry.url)
|
||||||
return cached_data
|
return cached_data
|
||||||
except (json.JSONDecodeError, PresetError):
|
except (json.JSONDecodeError, OSError, UnicodeError, PresetError):
|
||||||
|
# Cache is best-effort: a JSON-decode failure, an OS-level
|
||||||
|
# read failure (permissions / disk / handle limit), or a
|
||||||
|
# text-encoding failure on a cache file written by an
|
||||||
|
# older client all fall through to the network fetch path.
|
||||||
|
# Only the network failure is surfaced to the caller.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -2182,24 +2187,36 @@ class PresetCatalog:
|
|||||||
"""
|
"""
|
||||||
catalog_url = self.get_catalog_url()
|
catalog_url = self.get_catalog_url()
|
||||||
|
|
||||||
|
# Match the ``_fetch_single_catalog`` cache contract: a poisoned
|
||||||
|
# or unreadable cache silently falls through to a network refetch
|
||||||
|
# rather than crashing the caller. ``_validate_catalog_payload``
|
||||||
|
# is reused here so a cache written by an older client
|
||||||
|
# (pre-validation) is rejected and refreshed instead of returning
|
||||||
|
# the stale malformed payload.
|
||||||
if not force_refresh and self.is_cache_valid():
|
if not force_refresh and self.is_cache_valid():
|
||||||
try:
|
try:
|
||||||
metadata = json.loads(self.cache_metadata_file.read_text())
|
metadata = json.loads(
|
||||||
|
self.cache_metadata_file.read_text(encoding="utf-8")
|
||||||
|
)
|
||||||
if metadata.get("catalog_url") == catalog_url:
|
if metadata.get("catalog_url") == catalog_url:
|
||||||
return json.loads(self.cache_file.read_text())
|
cached_data = json.loads(
|
||||||
except (json.JSONDecodeError, OSError):
|
self.cache_file.read_text(encoding="utf-8")
|
||||||
# Cache is corrupt or unreadable; fall through to network fetch
|
)
|
||||||
|
self._validate_catalog_payload(cached_data, catalog_url)
|
||||||
|
return cached_data
|
||||||
|
except (json.JSONDecodeError, OSError, UnicodeError, PresetError):
|
||||||
|
# Cache is corrupt, unreadable, or fails the shape check;
|
||||||
|
# fall through to network fetch.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self._open_url(catalog_url, timeout=10) as response:
|
with self._open_url(catalog_url, timeout=10) as response:
|
||||||
catalog_data = json.loads(response.read())
|
catalog_data = json.loads(response.read())
|
||||||
|
|
||||||
if (
|
# Validate catalog structure. Reuses the same helper as
|
||||||
"schema_version" not in catalog_data
|
# ``_fetch_single_catalog`` so all three branches (root type,
|
||||||
or "presets" not in catalog_data
|
# missing keys, nested-mapping type) stay consistent.
|
||||||
):
|
self._validate_catalog_payload(catalog_data, catalog_url)
|
||||||
raise PresetError(f"Invalid preset catalog format from {catalog_url}")
|
|
||||||
|
|
||||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
self.cache_file.write_text(json.dumps(catalog_data, indent=2))
|
self.cache_file.write_text(json.dumps(catalog_data, indent=2))
|
||||||
|
|||||||
@@ -2692,6 +2692,86 @@ class TestExtensionCatalog:
|
|||||||
# The poisoned cache was discarded and the network payload returned.
|
# The poisoned cache was discarded and the network payload returned.
|
||||||
assert result == valid
|
assert result == valid
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"payload",
|
||||||
|
[
|
||||||
|
# Root is not a JSON object.
|
||||||
|
[],
|
||||||
|
"oops",
|
||||||
|
42,
|
||||||
|
None,
|
||||||
|
# Root is fine but ``extensions`` is the wrong type.
|
||||||
|
{"schema_version": "1.0", "extensions": []},
|
||||||
|
{"schema_version": "1.0", "extensions": "oops"},
|
||||||
|
{"schema_version": "1.0", "extensions": None},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_fetch_catalog_rejects_malformed_payload(self, temp_dir, payload):
|
||||||
|
"""Legacy ``fetch_catalog`` reuses the same shape-validation helper.
|
||||||
|
|
||||||
|
Before this change ``fetch_catalog`` only checked key presence — so
|
||||||
|
a payload like ``42`` would crash with
|
||||||
|
``TypeError: argument of type 'int' is not iterable`` during the
|
||||||
|
``"schema_version" in catalog_data`` check, and an entry mapping
|
||||||
|
of the wrong type would crash downstream. Reusing
|
||||||
|
``_validate_catalog_payload`` keeps the network-side behaviour of
|
||||||
|
the legacy single-catalog method consistent with the multi-catalog
|
||||||
|
``_fetch_single_catalog`` path.
|
||||||
|
"""
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
catalog = self._make_catalog(temp_dir)
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.read.return_value = json.dumps(payload).encode()
|
||||||
|
mock_response.__enter__ = lambda s: s
|
||||||
|
mock_response.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with patch.object(catalog, "_open_url", return_value=mock_response):
|
||||||
|
with pytest.raises(ExtensionError, match="Invalid catalog format"):
|
||||||
|
catalog.fetch_catalog(force_refresh=True)
|
||||||
|
|
||||||
|
def test_fetch_catalog_recovers_from_unreadable_cache(self, temp_dir):
|
||||||
|
"""An unreadable / wrong-encoded cache file silently refetches.
|
||||||
|
|
||||||
|
The cache contract is best-effort: a JSON-decode failure, an OS
|
||||||
|
read failure (permissions / disk / handle limit), or an invalid
|
||||||
|
text encoding on a cache file written by an older client must
|
||||||
|
all fall through to the network fetch rather than crash the
|
||||||
|
caller. Covers Copilot's review point that the previous
|
||||||
|
``except (json.JSONDecodeError,)`` was too narrow.
|
||||||
|
"""
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
catalog = self._make_catalog(temp_dir)
|
||||||
|
# Write invalid UTF-8 bytes to the cache file so ``read_text``
|
||||||
|
# raises ``UnicodeDecodeError`` (a subclass of ``UnicodeError``).
|
||||||
|
catalog.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8")
|
||||||
|
catalog.cache_metadata_file.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"cached_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL,
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
valid = {
|
||||||
|
"schema_version": "1.0",
|
||||||
|
"extensions": {"foo": {"name": "Foo", "version": "1.0.0"}},
|
||||||
|
}
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.read.return_value = json.dumps(valid).encode()
|
||||||
|
mock_response.__enter__ = lambda s: s
|
||||||
|
mock_response.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with patch.object(catalog, "_open_url", return_value=mock_response):
|
||||||
|
result = catalog.fetch_catalog(force_refresh=False)
|
||||||
|
|
||||||
|
# Recovered via network rather than crashing on the unreadable cache.
|
||||||
|
assert result == valid
|
||||||
|
|
||||||
def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir):
|
def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir):
|
||||||
"""Per-entry guard: one malformed entry shouldn't poison the merge.
|
"""Per-entry guard: one malformed entry shouldn't poison the merge.
|
||||||
|
|
||||||
|
|||||||
@@ -1630,6 +1630,87 @@ class TestPresetCatalog:
|
|||||||
# The poisoned cache was discarded and the network payload returned.
|
# The poisoned cache was discarded and the network payload returned.
|
||||||
assert result == valid
|
assert result == valid
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"payload",
|
||||||
|
[
|
||||||
|
# Root is not a JSON object.
|
||||||
|
[],
|
||||||
|
"oops",
|
||||||
|
42,
|
||||||
|
None,
|
||||||
|
# Root is fine but ``presets`` is the wrong type.
|
||||||
|
{"schema_version": "1.0", "presets": []},
|
||||||
|
{"schema_version": "1.0", "presets": "oops"},
|
||||||
|
{"schema_version": "1.0", "presets": None},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_fetch_catalog_rejects_malformed_payload(self, project_dir, payload):
|
||||||
|
"""Legacy ``fetch_catalog`` reuses the same shape-validation helper.
|
||||||
|
|
||||||
|
Before this change ``fetch_catalog`` only checked key presence —
|
||||||
|
so a payload like ``42`` would crash with
|
||||||
|
``TypeError: argument of type 'int' is not iterable`` during the
|
||||||
|
``"schema_version" in catalog_data`` check, and an entry mapping
|
||||||
|
of the wrong type would crash downstream. Reusing
|
||||||
|
``_validate_catalog_payload`` keeps the network-side behaviour of
|
||||||
|
the legacy single-catalog method consistent with the multi-catalog
|
||||||
|
``_fetch_single_catalog`` path.
|
||||||
|
"""
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
catalog = PresetCatalog(project_dir)
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.read.return_value = json.dumps(payload).encode()
|
||||||
|
mock_response.__enter__ = lambda s: s
|
||||||
|
mock_response.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with patch.object(catalog, "_open_url", return_value=mock_response):
|
||||||
|
with pytest.raises(PresetError, match="Invalid preset catalog format"):
|
||||||
|
catalog.fetch_catalog(force_refresh=True)
|
||||||
|
|
||||||
|
def test_fetch_catalog_recovers_from_unreadable_cache(self, project_dir):
|
||||||
|
"""An unreadable / wrong-encoded cache file silently refetches.
|
||||||
|
|
||||||
|
The cache contract is best-effort: a JSON-decode failure, an OS
|
||||||
|
read failure (permissions / disk / handle limit), or an invalid
|
||||||
|
text encoding on a cache file written by an older client must
|
||||||
|
all fall through to the network fetch rather than crash the
|
||||||
|
caller. Covers Copilot's review point that the previous
|
||||||
|
``except (json.JSONDecodeError, OSError)`` was missing
|
||||||
|
``UnicodeError``.
|
||||||
|
"""
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
catalog = PresetCatalog(project_dir)
|
||||||
|
catalog.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Invalid UTF-8 bytes so ``read_text`` raises ``UnicodeDecodeError``
|
||||||
|
# (a subclass of ``UnicodeError``).
|
||||||
|
catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8")
|
||||||
|
catalog.cache_metadata_file.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"cached_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"catalog_url": catalog.get_catalog_url(),
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
valid = {
|
||||||
|
"schema_version": "1.0",
|
||||||
|
"presets": {"foo": {"name": "Foo", "version": "1.0.0"}},
|
||||||
|
}
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.read.return_value = json.dumps(valid).encode()
|
||||||
|
mock_response.__enter__ = lambda s: s
|
||||||
|
mock_response.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with patch.object(catalog, "_open_url", return_value=mock_response):
|
||||||
|
result = catalog.fetch_catalog(force_refresh=False)
|
||||||
|
|
||||||
|
# Recovered via network rather than crashing on the unreadable cache.
|
||||||
|
assert result == valid
|
||||||
|
|
||||||
def test_get_merged_packs_skips_non_mapping_entries(self, project_dir):
|
def test_get_merged_packs_skips_non_mapping_entries(self, project_dir):
|
||||||
"""Per-entry guard: one malformed entry shouldn't poison the merge.
|
"""Per-entry guard: one malformed entry shouldn't poison the merge.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user