fix(manifest): clear recovered marker on managed re-record + reject '..' in is_recovered

Address Copilot Round-7 review comments on PR #2483:

1. record_existing(recovered=False) and record_file now BOTH discard the
   path from _recovered_files. The marker is meant to flag "we observed
   this file but cannot vouch it's a managed baseline" — once the same
   path is re-recorded as managed (either explicitly or by writing fresh
   bytes), the marker is stale and must clear so refresh_managed and
   future is_recovered queries return the truthful answer.

2. is_recovered now applies the same canonical-key guard as record_existing
   (rejects absolute paths and '..' segments lexically before delegating
   to _validate_rel_path). Such paths can never be stored keys, so the
   query correctly returns False without depending on _validate_rel_path
   semantics that diverged from record_existing's stricter contract.

record_file docstring updated to mention the side-effect on recovered
markers.

Tests: 3 new methods in TestManifestRecoveredFiles covering
record_existing(false) clearing, record_file clearing, and is_recovered
dotdot rejection.
This commit is contained in:
eldar702
2026-05-27 19:31:26 +03:00
parent 25051ff38f
commit 1dbf0c2229
2 changed files with 52 additions and 7 deletions

View File

@@ -132,6 +132,9 @@ class IntegrationManifest:
Creates parent directories as needed. Returns the absolute path Creates parent directories as needed. Returns the absolute path
of the written file. of the written file.
If the path was previously marked as recovered via
``record_existing(recovered=True)``, the recovered marker is
cleared because the bytes are now produced, not merely observed.
Raises ``ValueError`` if *rel_path* resolves outside the project root. Raises ``ValueError`` if *rel_path* resolves outside the project root.
""" """
@@ -145,6 +148,9 @@ class IntegrationManifest:
normalized = abs_path.relative_to(self.project_root).as_posix() normalized = abs_path.relative_to(self.project_root).as_posix()
self._files[normalized] = hashlib.sha256(content).hexdigest() self._files[normalized] = hashlib.sha256(content).hexdigest()
# ``record_file`` writes *produced* content, so any prior
# recovered marker for this path is no longer accurate.
self._recovered_files.discard(normalized)
return abs_path return abs_path
def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None: def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None:
@@ -201,6 +207,12 @@ class IntegrationManifest:
self._files[normalized] = _sha256(abs_path) self._files[normalized] = _sha256(abs_path)
if recovered: if recovered:
self._recovered_files.add(normalized) self._recovered_files.add(normalized)
else:
# ``recovered=False`` means the caller is asserting this path is
# managed-baseline now, not merely observed; drop any stale
# recovered marker so future is_recovered() queries reflect the
# transition. ``discard`` is a no-op when the key is absent.
self._recovered_files.discard(normalized)
# -- Querying --------------------------------------------------------- # -- Querying ---------------------------------------------------------
@@ -224,15 +236,17 @@ class IntegrationManifest:
def is_recovered(self, rel_path: str | Path) -> bool: def is_recovered(self, rel_path: str | Path) -> bool:
"""Return True if *rel_path* was recorded via ``record_existing(recovered=True)``. """Return True if *rel_path* was recorded via ``record_existing(recovered=True)``.
Input is normalized through the same ``_validate_rel_path`` pipeline that Input is normalized through the same pipeline as ``record_existing``:
``record_existing`` uses for its stored keys, so the two methods agree absolute paths, paths escaping the project root, AND paths containing
on key format. Absolute paths and paths that escape the project root ``'..'`` segments are rejected (returned as ``False``). This mirrors
return ``False`` (they cannot match the relative POSIX keys we store) — ``record_existing``'s canonicalization guard — such paths can never
consistent with Python's membership-predicate convention of not raising appear as stored keys, so the answer is always ``False``.
on a not-in-set query.
""" """
rel = Path(rel_path)
if rel.is_absolute() or ".." in rel.parts:
return False
try: try:
abs_path = _validate_rel_path(Path(rel_path), self.project_root) abs_path = _validate_rel_path(rel, self.project_root)
normalized = abs_path.relative_to(self.project_root).as_posix() normalized = abs_path.relative_to(self.project_root).as_posix()
except ValueError: except ValueError:
return False return False

View File

@@ -375,6 +375,37 @@ class TestManifestRecoveredFiles:
# Don't record anything — the path is impossible to record anyway. # Don't record anything — the path is impossible to record anyway.
assert m.is_recovered("../escape.txt") is False assert m.is_recovered("../escape.txt") is False
def test_record_existing_clears_recovered_when_false(self, tmp_path):
# Finding A: re-recording the same path with recovered=False must
# drop the prior recovered marker (transition to managed baseline).
f = tmp_path / "x.txt"
f.write_text("v1", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("x.txt", recovered=True)
assert m.is_recovered("x.txt") is True
m.record_existing("x.txt", recovered=False)
assert m.is_recovered("x.txt") is False
def test_record_file_clears_recovered(self, tmp_path):
# Finding A: record_file writes produced content; the path can no
# longer be considered "merely observed" once we wrote bytes.
(tmp_path / "y.txt").write_text("observed", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("y.txt", recovered=True)
assert m.is_recovered("y.txt") is True
m.record_file("y.txt", "produced")
assert m.is_recovered("y.txt") is False
def test_is_recovered_rejects_dotdot_segment(self, tmp_path):
# Finding B: record_existing rejects ``..`` segments via the lexical
# pre-check; is_recovered must match that behavior and return False
# without raising, mirroring the canonicalization guard.
(tmp_path / "z.txt").write_text("v1", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("z.txt", recovered=True)
# Same file via dotdot-normalizing path — must be False, not raise.
assert m.is_recovered("subdir/../z.txt") is False
class TestRecordExistingNewGuards: class TestRecordExistingNewGuards:
"""Coverage for the two new guards added by Copilot's 2026-05-18 review.""" """Coverage for the two new guards added by Copilot's 2026-05-18 review."""