From 1dbf0c222952e0dbf9971aaabe371f10a060eec7 Mon Sep 17 00:00:00 2001 From: eldar702 Date: Wed, 27 May 2026 19:31:26 +0300 Subject: [PATCH] fix(manifest): clear recovered marker on managed re-record + reject '..' in is_recovered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot Round-7 review comments on PR #2483: 1. record_existing(recovered=False) and record_file now BOTH discard the path from _recovered_files. The marker is meant to flag "we observed this file but cannot vouch it's a managed baseline" — once the same path is re-recorded as managed (either explicitly or by writing fresh bytes), the marker is stale and must clear so refresh_managed and future is_recovered queries return the truthful answer. 2. is_recovered now applies the same canonical-key guard as record_existing (rejects absolute paths and '..' segments lexically before delegating to _validate_rel_path). Such paths can never be stored keys, so the query correctly returns False without depending on _validate_rel_path semantics that diverged from record_existing's stricter contract. record_file docstring updated to mention the side-effect on recovered markers. Tests: 3 new methods in TestManifestRecoveredFiles covering record_existing(false) clearing, record_file clearing, and is_recovered dotdot rejection. --- src/specify_cli/integrations/manifest.py | 28 +++++++++++++++------ tests/integrations/test_manifest.py | 31 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/specify_cli/integrations/manifest.py b/src/specify_cli/integrations/manifest.py index e6be895e8..7a5307188 100644 --- a/src/specify_cli/integrations/manifest.py +++ b/src/specify_cli/integrations/manifest.py @@ -132,6 +132,9 @@ class IntegrationManifest: Creates parent directories as needed. Returns the absolute path of the written file. + If the path was previously marked as recovered via + ``record_existing(recovered=True)``, the recovered marker is + cleared because the bytes are now produced, not merely observed. Raises ``ValueError`` if *rel_path* resolves outside the project root. """ @@ -145,6 +148,9 @@ class IntegrationManifest: normalized = abs_path.relative_to(self.project_root).as_posix() self._files[normalized] = hashlib.sha256(content).hexdigest() + # ``record_file`` writes *produced* content, so any prior + # recovered marker for this path is no longer accurate. + self._recovered_files.discard(normalized) return abs_path def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None: @@ -201,6 +207,12 @@ class IntegrationManifest: self._files[normalized] = _sha256(abs_path) if recovered: self._recovered_files.add(normalized) + else: + # ``recovered=False`` means the caller is asserting this path is + # managed-baseline now, not merely observed; drop any stale + # recovered marker so future is_recovered() queries reflect the + # transition. ``discard`` is a no-op when the key is absent. + self._recovered_files.discard(normalized) # -- Querying --------------------------------------------------------- @@ -224,15 +236,17 @@ class IntegrationManifest: def is_recovered(self, rel_path: str | Path) -> bool: """Return True if *rel_path* was recorded via ``record_existing(recovered=True)``. - Input is normalized through the same ``_validate_rel_path`` pipeline that - ``record_existing`` uses for its stored keys, so the two methods agree - on key format. Absolute paths and paths that escape the project root - return ``False`` (they cannot match the relative POSIX keys we store) — - consistent with Python's membership-predicate convention of not raising - on a not-in-set query. + Input is normalized through the same pipeline as ``record_existing``: + absolute paths, paths escaping the project root, AND paths containing + ``'..'`` segments are rejected (returned as ``False``). This mirrors + ``record_existing``'s canonicalization guard — such paths can never + appear as stored keys, so the answer is always ``False``. """ + rel = Path(rel_path) + if rel.is_absolute() or ".." in rel.parts: + return False try: - abs_path = _validate_rel_path(Path(rel_path), self.project_root) + abs_path = _validate_rel_path(rel, self.project_root) normalized = abs_path.relative_to(self.project_root).as_posix() except ValueError: return False diff --git a/tests/integrations/test_manifest.py b/tests/integrations/test_manifest.py index a469a2fa5..80d04fe91 100644 --- a/tests/integrations/test_manifest.py +++ b/tests/integrations/test_manifest.py @@ -375,6 +375,37 @@ class TestManifestRecoveredFiles: # Don't record anything — the path is impossible to record anyway. assert m.is_recovered("../escape.txt") is False + def test_record_existing_clears_recovered_when_false(self, tmp_path): + # Finding A: re-recording the same path with recovered=False must + # drop the prior recovered marker (transition to managed baseline). + f = tmp_path / "x.txt" + f.write_text("v1", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("x.txt", recovered=True) + assert m.is_recovered("x.txt") is True + m.record_existing("x.txt", recovered=False) + assert m.is_recovered("x.txt") is False + + def test_record_file_clears_recovered(self, tmp_path): + # Finding A: record_file writes produced content; the path can no + # longer be considered "merely observed" once we wrote bytes. + (tmp_path / "y.txt").write_text("observed", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("y.txt", recovered=True) + assert m.is_recovered("y.txt") is True + m.record_file("y.txt", "produced") + assert m.is_recovered("y.txt") is False + + def test_is_recovered_rejects_dotdot_segment(self, tmp_path): + # Finding B: record_existing rejects ``..`` segments via the lexical + # pre-check; is_recovered must match that behavior and return False + # without raising, mirroring the canonicalization guard. + (tmp_path / "z.txt").write_text("v1", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("z.txt", recovered=True) + # Same file via dotdot-normalizing path — must be False, not raise. + assert m.is_recovered("subdir/../z.txt") is False + class TestRecordExistingNewGuards: """Coverage for the two new guards added by Copilot's 2026-05-18 review."""