fix(manifest): clear recovered marker on managed re-record + reject '..' in is_recovered

Address Copilot Round-7 review comments on PR #2483:

1. record_existing(recovered=False) and record_file now BOTH discard the
   path from _recovered_files. The marker is meant to flag "we observed
   this file but cannot vouch it's a managed baseline" — once the same
   path is re-recorded as managed (either explicitly or by writing fresh
   bytes), the marker is stale and must clear so refresh_managed and
   future is_recovered queries return the truthful answer.

2. is_recovered now applies the same canonical-key guard as record_existing
   (rejects absolute paths and '..' segments lexically before delegating
   to _validate_rel_path). Such paths can never be stored keys, so the
   query correctly returns False without depending on _validate_rel_path
   semantics that diverged from record_existing's stricter contract.

record_file docstring updated to mention the side-effect on recovered
markers.

Tests: 3 new methods in TestManifestRecoveredFiles covering
record_existing(false) clearing, record_file clearing, and is_recovered
dotdot rejection.
This commit is contained in:
eldar702
2026-05-27 19:31:26 +03:00
parent 25051ff38f
commit 1dbf0c2229
2 changed files with 52 additions and 7 deletions

View File

@@ -132,6 +132,9 @@ class IntegrationManifest:
Creates parent directories as needed. Returns the absolute path
of the written file.
If the path was previously marked as recovered via
``record_existing(recovered=True)``, the recovered marker is
cleared because the bytes are now produced, not merely observed.
Raises ``ValueError`` if *rel_path* resolves outside the project root.
"""
@@ -145,6 +148,9 @@ class IntegrationManifest:
normalized = abs_path.relative_to(self.project_root).as_posix()
self._files[normalized] = hashlib.sha256(content).hexdigest()
# ``record_file`` writes *produced* content, so any prior
# recovered marker for this path is no longer accurate.
self._recovered_files.discard(normalized)
return abs_path
def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None:
@@ -201,6 +207,12 @@ class IntegrationManifest:
self._files[normalized] = _sha256(abs_path)
if recovered:
self._recovered_files.add(normalized)
else:
# ``recovered=False`` means the caller is asserting this path is
# managed-baseline now, not merely observed; drop any stale
# recovered marker so future is_recovered() queries reflect the
# transition. ``discard`` is a no-op when the key is absent.
self._recovered_files.discard(normalized)
# -- Querying ---------------------------------------------------------
@@ -224,15 +236,17 @@ class IntegrationManifest:
def is_recovered(self, rel_path: str | Path) -> bool:
"""Return True if *rel_path* was recorded via ``record_existing(recovered=True)``.
Input is normalized through the same ``_validate_rel_path`` pipeline that
``record_existing`` uses for its stored keys, so the two methods agree
on key format. Absolute paths and paths that escape the project root
return ``False`` (they cannot match the relative POSIX keys we store) —
consistent with Python's membership-predicate convention of not raising
on a not-in-set query.
Input is normalized through the same pipeline as ``record_existing``:
absolute paths, paths escaping the project root, AND paths containing
``'..'`` segments are rejected (returned as ``False``). This mirrors
``record_existing``'s canonicalization guard — such paths can never
appear as stored keys, so the answer is always ``False``.
"""
rel = Path(rel_path)
if rel.is_absolute() or ".." in rel.parts:
return False
try:
abs_path = _validate_rel_path(Path(rel_path), self.project_root)
abs_path = _validate_rel_path(rel, self.project_root)
normalized = abs_path.relative_to(self.project_root).as_posix()
except ValueError:
return False

View File

@@ -375,6 +375,37 @@ class TestManifestRecoveredFiles:
# Don't record anything — the path is impossible to record anyway.
assert m.is_recovered("../escape.txt") is False
def test_record_existing_clears_recovered_when_false(self, tmp_path):
# Finding A: re-recording the same path with recovered=False must
# drop the prior recovered marker (transition to managed baseline).
f = tmp_path / "x.txt"
f.write_text("v1", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("x.txt", recovered=True)
assert m.is_recovered("x.txt") is True
m.record_existing("x.txt", recovered=False)
assert m.is_recovered("x.txt") is False
def test_record_file_clears_recovered(self, tmp_path):
# Finding A: record_file writes produced content; the path can no
# longer be considered "merely observed" once we wrote bytes.
(tmp_path / "y.txt").write_text("observed", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("y.txt", recovered=True)
assert m.is_recovered("y.txt") is True
m.record_file("y.txt", "produced")
assert m.is_recovered("y.txt") is False
def test_is_recovered_rejects_dotdot_segment(self, tmp_path):
# Finding B: record_existing rejects ``..`` segments via the lexical
# pre-check; is_recovered must match that behavior and return False
# without raising, mirroring the canonicalization guard.
(tmp_path / "z.txt").write_text("v1", encoding="utf-8")
m = IntegrationManifest("test", tmp_path)
m.record_existing("z.txt", recovered=True)
# Same file via dotdot-normalizing path — must be False, not raise.
assert m.is_recovered("subdir/../z.txt") is False
class TestRecordExistingNewGuards:
"""Coverage for the two new guards added by Copilot's 2026-05-18 review."""