From 7f44b256b8394734746e88814f2e378431675381 Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Mon, 1 Jun 2026 21:43:49 +0500 Subject: [PATCH] fix(catalogs): broaden cache except tuples and reuse validator in fetch_catalog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot review feedback on this PR (round 4): 1. ``ExtensionCatalog.fetch_catalog`` and ``PresetCatalog.fetch_catalog`` — the legacy single-catalog methods — still only checked key presence. A payload like ``42`` (root non-object) crashed with ``TypeError: argument of type 'int' is not iterable`` during the ``"schema_version" in catalog_data`` check, and an entry mapping of the wrong type crashed downstream. Both now reuse ``_validate_catalog_payload`` so the network-side behaviour of the legacy methods stays consistent with the multi-catalog ``_fetch_single_catalog`` path. (Copilot #3335623482, #3335623556.) 2. The cache-read ``except`` tuples in ``_fetch_single_catalog`` and ``fetch_catalog`` were too narrow. ``read_text`` can raise ``OSError`` (permissions / disk / handle limit) or ``UnicodeError`` (cache file written by an older client in a different encoding) in addition to ``json.JSONDecodeError``. Without those in the tuple, an unreadable cache crashed the caller instead of falling through to the network refetch the cache contract documents. Both sites now catch ``(json.JSONDecodeError, OSError, UnicodeError, )``. (Copilot #3335623588, #3335623608.) 3. While here, pinned ``encoding="utf-8"`` on every cache ``read_text`` call so cache files written by an older Windows client (with a non-UTF-8 default locale) decode the same way on a newer client. Regression tests: - ``test_fetch_catalog_rejects_malformed_payload`` — 7 parametrized payloads per file covering root-non-object + nested-bad-type variants asserting ``fetch_catalog`` raises the named domain error. - ``test_fetch_catalog_recovers_from_unreadable_cache`` — writes ``b"\xff\xfe\x00not-utf-8"`` to the cache file and asserts ``fetch_catalog`` silently falls through to the mocked network and returns the freshly-fetched payload. --- src/specify_cli/extensions.py | 38 ++++++++++------ src/specify_cli/presets.py | 39 ++++++++++++----- tests/test_extensions.py | 80 ++++++++++++++++++++++++++++++++++ tests/test_presets.py | 81 +++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 23 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 9ee09297d..1d92b3034 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1874,10 +1874,15 @@ class ExtensionCatalog(CatalogStackBase): # through to the network fetch path so the cache gets refreshed. if is_valid: try: - cached_data = json.loads(cache_file.read_text()) + cached_data = json.loads(cache_file.read_text(encoding="utf-8")) self._validate_catalog_payload(cached_data, entry.url) return cached_data - except (json.JSONDecodeError, ExtensionError): + except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError): + # Cache is best-effort: a JSON-decode failure, an OS-level + # read failure (permissions / disk / handle limit), or a + # text-encoding failure on a cache file written by an older + # client all fall through to the network fetch path. Only + # the network failure is surfaced to the caller. pass # Fetch from network @@ -1994,25 +1999,34 @@ class ExtensionCatalog(CatalogStackBase): Raises: ExtensionError: If catalog cannot be fetched """ - # Check cache first unless force refresh - if not force_refresh and self.is_cache_valid(): - try: - return json.loads(self.cache_file.read_text()) - except json.JSONDecodeError: - pass # Fall through to network fetch - # Fetch from network catalog_url = self.get_catalog_url() + # Check cache first unless force refresh. Match the + # ``_fetch_single_catalog`` cache contract: a poisoned or + # unreadable cache silently falls through to a network refetch + # rather than crashing the caller. ``_validate_catalog_payload`` + # is reused here so a cache written by an older client + # (pre-validation) is rejected and refreshed instead of returning + # the stale malformed payload. + if not force_refresh and self.is_cache_valid(): + try: + cached_data = json.loads(self.cache_file.read_text(encoding="utf-8")) + self._validate_catalog_payload(cached_data, catalog_url) + return cached_data + except (json.JSONDecodeError, OSError, UnicodeError, ExtensionError): + pass # Fall through to network fetch + try: import urllib.error with self._open_url(catalog_url, timeout=10) as response: catalog_data = json.loads(response.read()) - # Validate catalog structure - if "schema_version" not in catalog_data or "extensions" not in catalog_data: - raise ExtensionError(f"Invalid catalog format from {catalog_url}") + # Validate catalog structure. Reuses the same helper as + # ``_fetch_single_catalog`` so all three branches (root type, + # missing keys, nested-mapping type) stay consistent. + self._validate_catalog_payload(catalog_data, catalog_url) # Save to cache self.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 08aaa045f..ec3f1cf5b 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -2085,10 +2085,15 @@ class PresetCatalog: # refreshed. if not force_refresh and self._is_url_cache_valid(entry.url): try: - cached_data = json.loads(cache_file.read_text()) + cached_data = json.loads(cache_file.read_text(encoding="utf-8")) self._validate_catalog_payload(cached_data, entry.url) return cached_data - except (json.JSONDecodeError, PresetError): + except (json.JSONDecodeError, OSError, UnicodeError, PresetError): + # Cache is best-effort: a JSON-decode failure, an OS-level + # read failure (permissions / disk / handle limit), or a + # text-encoding failure on a cache file written by an + # older client all fall through to the network fetch path. + # Only the network failure is surfaced to the caller. pass try: @@ -2182,24 +2187,36 @@ class PresetCatalog: """ catalog_url = self.get_catalog_url() + # Match the ``_fetch_single_catalog`` cache contract: a poisoned + # or unreadable cache silently falls through to a network refetch + # rather than crashing the caller. ``_validate_catalog_payload`` + # is reused here so a cache written by an older client + # (pre-validation) is rejected and refreshed instead of returning + # the stale malformed payload. if not force_refresh and self.is_cache_valid(): try: - metadata = json.loads(self.cache_metadata_file.read_text()) + metadata = json.loads( + self.cache_metadata_file.read_text(encoding="utf-8") + ) if metadata.get("catalog_url") == catalog_url: - return json.loads(self.cache_file.read_text()) - except (json.JSONDecodeError, OSError): - # Cache is corrupt or unreadable; fall through to network fetch + cached_data = json.loads( + self.cache_file.read_text(encoding="utf-8") + ) + self._validate_catalog_payload(cached_data, catalog_url) + return cached_data + except (json.JSONDecodeError, OSError, UnicodeError, PresetError): + # Cache is corrupt, unreadable, or fails the shape check; + # fall through to network fetch. pass try: with self._open_url(catalog_url, timeout=10) as response: catalog_data = json.loads(response.read()) - if ( - "schema_version" not in catalog_data - or "presets" not in catalog_data - ): - raise PresetError(f"Invalid preset catalog format from {catalog_url}") + # Validate catalog structure. Reuses the same helper as + # ``_fetch_single_catalog`` so all three branches (root type, + # missing keys, nested-mapping type) stay consistent. + self._validate_catalog_payload(catalog_data, catalog_url) self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_file.write_text(json.dumps(catalog_data, indent=2)) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 4e89663a3..be72f3263 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2692,6 +2692,86 @@ class TestExtensionCatalog: # The poisoned cache was discarded and the network payload returned. assert result == valid + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``extensions`` is the wrong type. + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + ], + ) + def test_fetch_catalog_rejects_malformed_payload(self, temp_dir, payload): + """Legacy ``fetch_catalog`` reuses the same shape-validation helper. + + Before this change ``fetch_catalog`` only checked key presence — so + a payload like ``42`` would crash with + ``TypeError: argument of type 'int' is not iterable`` during the + ``"schema_version" in catalog_data`` check, and an entry mapping + of the wrong type would crash downstream. Reusing + ``_validate_catalog_payload`` keeps the network-side behaviour of + the legacy single-catalog method consistent with the multi-catalog + ``_fetch_single_catalog`` path. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="Invalid catalog format"): + catalog.fetch_catalog(force_refresh=True) + + def test_fetch_catalog_recovers_from_unreadable_cache(self, temp_dir): + """An unreadable / wrong-encoded cache file silently refetches. + + The cache contract is best-effort: a JSON-decode failure, an OS + read failure (permissions / disk / handle limit), or an invalid + text encoding on a cache file written by an older client must + all fall through to the network fetch rather than crash the + caller. Covers Copilot's review point that the previous + ``except (json.JSONDecodeError,)`` was too narrow. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + # Write invalid UTF-8 bytes to the cache file so ``read_text`` + # raises ``UnicodeDecodeError`` (a subclass of ``UnicodeError``). + catalog.cache_dir.mkdir(parents=True, exist_ok=True) + catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8") + catalog.cache_metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL, + } + ), + encoding="utf-8", + ) + + valid = { + "schema_version": "1.0", + "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.fetch_catalog(force_refresh=False) + + # Recovered via network rather than crashing on the unreadable cache. + assert result == valid + def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): """Per-entry guard: one malformed entry shouldn't poison the merge. diff --git a/tests/test_presets.py b/tests/test_presets.py index 5322d1454..85275e6bd 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1630,6 +1630,87 @@ class TestPresetCatalog: # The poisoned cache was discarded and the network payload returned. assert result == valid + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``presets`` is the wrong type. + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + ], + ) + def test_fetch_catalog_rejects_malformed_payload(self, project_dir, payload): + """Legacy ``fetch_catalog`` reuses the same shape-validation helper. + + Before this change ``fetch_catalog`` only checked key presence — + so a payload like ``42`` would crash with + ``TypeError: argument of type 'int' is not iterable`` during the + ``"schema_version" in catalog_data`` check, and an entry mapping + of the wrong type would crash downstream. Reusing + ``_validate_catalog_payload`` keeps the network-side behaviour of + the legacy single-catalog method consistent with the multi-catalog + ``_fetch_single_catalog`` path. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="Invalid preset catalog format"): + catalog.fetch_catalog(force_refresh=True) + + def test_fetch_catalog_recovers_from_unreadable_cache(self, project_dir): + """An unreadable / wrong-encoded cache file silently refetches. + + The cache contract is best-effort: a JSON-decode failure, an OS + read failure (permissions / disk / handle limit), or an invalid + text encoding on a cache file written by an older client must + all fall through to the network fetch rather than crash the + caller. Covers Copilot's review point that the previous + ``except (json.JSONDecodeError, OSError)`` was missing + ``UnicodeError``. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + catalog.cache_dir.mkdir(parents=True, exist_ok=True) + # Invalid UTF-8 bytes so ``read_text`` raises ``UnicodeDecodeError`` + # (a subclass of ``UnicodeError``). + catalog.cache_file.write_bytes(b"\xff\xfe\x00not-utf-8") + catalog.cache_metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": catalog.get_catalog_url(), + } + ), + encoding="utf-8", + ) + + valid = { + "schema_version": "1.0", + "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.fetch_catalog(force_refresh=False) + + # Recovered via network rather than crashing on the unreadable cache. + assert result == valid + def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): """Per-entry guard: one malformed entry shouldn't poison the merge.