mirror of
https://github.com/github/spec-kit.git
synced 2026-07-04 13:12:23 +08:00
fix(manifest): clear recovered marker on managed re-record + reject '..' in is_recovered
Address Copilot Round-7 review comments on PR #2483: 1. record_existing(recovered=False) and record_file now BOTH discard the path from _recovered_files. The marker is meant to flag "we observed this file but cannot vouch it's a managed baseline" — once the same path is re-recorded as managed (either explicitly or by writing fresh bytes), the marker is stale and must clear so refresh_managed and future is_recovered queries return the truthful answer. 2. is_recovered now applies the same canonical-key guard as record_existing (rejects absolute paths and '..' segments lexically before delegating to _validate_rel_path). Such paths can never be stored keys, so the query correctly returns False without depending on _validate_rel_path semantics that diverged from record_existing's stricter contract. record_file docstring updated to mention the side-effect on recovered markers. Tests: 3 new methods in TestManifestRecoveredFiles covering record_existing(false) clearing, record_file clearing, and is_recovered dotdot rejection.
This commit is contained in:
@@ -132,6 +132,9 @@ class IntegrationManifest:
|
|||||||
|
|
||||||
Creates parent directories as needed. Returns the absolute path
|
Creates parent directories as needed. Returns the absolute path
|
||||||
of the written file.
|
of the written file.
|
||||||
|
If the path was previously marked as recovered via
|
||||||
|
``record_existing(recovered=True)``, the recovered marker is
|
||||||
|
cleared because the bytes are now produced, not merely observed.
|
||||||
|
|
||||||
Raises ``ValueError`` if *rel_path* resolves outside the project root.
|
Raises ``ValueError`` if *rel_path* resolves outside the project root.
|
||||||
"""
|
"""
|
||||||
@@ -145,6 +148,9 @@ class IntegrationManifest:
|
|||||||
|
|
||||||
normalized = abs_path.relative_to(self.project_root).as_posix()
|
normalized = abs_path.relative_to(self.project_root).as_posix()
|
||||||
self._files[normalized] = hashlib.sha256(content).hexdigest()
|
self._files[normalized] = hashlib.sha256(content).hexdigest()
|
||||||
|
# ``record_file`` writes *produced* content, so any prior
|
||||||
|
# recovered marker for this path is no longer accurate.
|
||||||
|
self._recovered_files.discard(normalized)
|
||||||
return abs_path
|
return abs_path
|
||||||
|
|
||||||
def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None:
|
def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> None:
|
||||||
@@ -201,6 +207,12 @@ class IntegrationManifest:
|
|||||||
self._files[normalized] = _sha256(abs_path)
|
self._files[normalized] = _sha256(abs_path)
|
||||||
if recovered:
|
if recovered:
|
||||||
self._recovered_files.add(normalized)
|
self._recovered_files.add(normalized)
|
||||||
|
else:
|
||||||
|
# ``recovered=False`` means the caller is asserting this path is
|
||||||
|
# managed-baseline now, not merely observed; drop any stale
|
||||||
|
# recovered marker so future is_recovered() queries reflect the
|
||||||
|
# transition. ``discard`` is a no-op when the key is absent.
|
||||||
|
self._recovered_files.discard(normalized)
|
||||||
|
|
||||||
# -- Querying ---------------------------------------------------------
|
# -- Querying ---------------------------------------------------------
|
||||||
|
|
||||||
@@ -224,15 +236,17 @@ class IntegrationManifest:
|
|||||||
def is_recovered(self, rel_path: str | Path) -> bool:
|
def is_recovered(self, rel_path: str | Path) -> bool:
|
||||||
"""Return True if *rel_path* was recorded via ``record_existing(recovered=True)``.
|
"""Return True if *rel_path* was recorded via ``record_existing(recovered=True)``.
|
||||||
|
|
||||||
Input is normalized through the same ``_validate_rel_path`` pipeline that
|
Input is normalized through the same pipeline as ``record_existing``:
|
||||||
``record_existing`` uses for its stored keys, so the two methods agree
|
absolute paths, paths escaping the project root, AND paths containing
|
||||||
on key format. Absolute paths and paths that escape the project root
|
``'..'`` segments are rejected (returned as ``False``). This mirrors
|
||||||
return ``False`` (they cannot match the relative POSIX keys we store) —
|
``record_existing``'s canonicalization guard — such paths can never
|
||||||
consistent with Python's membership-predicate convention of not raising
|
appear as stored keys, so the answer is always ``False``.
|
||||||
on a not-in-set query.
|
|
||||||
"""
|
"""
|
||||||
|
rel = Path(rel_path)
|
||||||
|
if rel.is_absolute() or ".." in rel.parts:
|
||||||
|
return False
|
||||||
try:
|
try:
|
||||||
abs_path = _validate_rel_path(Path(rel_path), self.project_root)
|
abs_path = _validate_rel_path(rel, self.project_root)
|
||||||
normalized = abs_path.relative_to(self.project_root).as_posix()
|
normalized = abs_path.relative_to(self.project_root).as_posix()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -375,6 +375,37 @@ class TestManifestRecoveredFiles:
|
|||||||
# Don't record anything — the path is impossible to record anyway.
|
# Don't record anything — the path is impossible to record anyway.
|
||||||
assert m.is_recovered("../escape.txt") is False
|
assert m.is_recovered("../escape.txt") is False
|
||||||
|
|
||||||
|
def test_record_existing_clears_recovered_when_false(self, tmp_path):
|
||||||
|
# Finding A: re-recording the same path with recovered=False must
|
||||||
|
# drop the prior recovered marker (transition to managed baseline).
|
||||||
|
f = tmp_path / "x.txt"
|
||||||
|
f.write_text("v1", encoding="utf-8")
|
||||||
|
m = IntegrationManifest("test", tmp_path)
|
||||||
|
m.record_existing("x.txt", recovered=True)
|
||||||
|
assert m.is_recovered("x.txt") is True
|
||||||
|
m.record_existing("x.txt", recovered=False)
|
||||||
|
assert m.is_recovered("x.txt") is False
|
||||||
|
|
||||||
|
def test_record_file_clears_recovered(self, tmp_path):
|
||||||
|
# Finding A: record_file writes produced content; the path can no
|
||||||
|
# longer be considered "merely observed" once we wrote bytes.
|
||||||
|
(tmp_path / "y.txt").write_text("observed", encoding="utf-8")
|
||||||
|
m = IntegrationManifest("test", tmp_path)
|
||||||
|
m.record_existing("y.txt", recovered=True)
|
||||||
|
assert m.is_recovered("y.txt") is True
|
||||||
|
m.record_file("y.txt", "produced")
|
||||||
|
assert m.is_recovered("y.txt") is False
|
||||||
|
|
||||||
|
def test_is_recovered_rejects_dotdot_segment(self, tmp_path):
|
||||||
|
# Finding B: record_existing rejects ``..`` segments via the lexical
|
||||||
|
# pre-check; is_recovered must match that behavior and return False
|
||||||
|
# without raising, mirroring the canonicalization guard.
|
||||||
|
(tmp_path / "z.txt").write_text("v1", encoding="utf-8")
|
||||||
|
m = IntegrationManifest("test", tmp_path)
|
||||||
|
m.record_existing("z.txt", recovered=True)
|
||||||
|
# Same file via dotdot-normalizing path — must be False, not raise.
|
||||||
|
assert m.is_recovered("subdir/../z.txt") is False
|
||||||
|
|
||||||
|
|
||||||
class TestRecordExistingNewGuards:
|
class TestRecordExistingNewGuards:
|
||||||
"""Coverage for the two new guards added by Copilot's 2026-05-18 review."""
|
"""Coverage for the two new guards added by Copilot's 2026-05-18 review."""
|
||||||
|
|||||||
Reference in New Issue
Block a user