fix: address codex round 2 — revert harvest break + allow lookback 0

- harvest.py: revert break to continue — mtime ordering can diverge
  from embedded ended_at timestamps (copy/touch), so we must check all
  files rather than early-exiting on the first old one
- cycle.py: use `is not None and > 0` so lookback_hours=0 means
  "scan full history" (opt-out of the cutoff)
- __main__.py: propagate --lookback-hours 0 to config as explicit 0

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
carpedkm
2026-06-20 14:21:18 +00:00
parent 6cc1cd2e95
commit 01075c90d3
3 changed files with 8 additions and 5 deletions

View File

@@ -111,8 +111,10 @@ def _cfg_from_args(args, task_meta: Dict[str, Any] | None = None) -> Any:
overrides["codex_home"] = os.path.abspath(args.codex_home)
if getattr(args, "source", ""):
overrides["transcript_source"] = args.source
if getattr(args, "lookback_hours", 0):
if getattr(args, "lookback_hours", None) is not None and args.lookback_hours != 0:
overrides["lookback_hours"] = args.lookback_hours
elif getattr(args, "lookback_hours", None) == 0:
overrides["lookback_hours"] = 0 # explicit opt-out: scan full history
if getattr(args, "edit_budget", 0):
overrides["edit_budget"] = args.edit_budget
if getattr(args, "max_sessions", 0):

View File

@@ -148,7 +148,7 @@ def run_sleep_cycle(
# scan the entire transcript history and trigger massive LLM mining.
if since is None:
lookback_hours = cfg.get("lookback_hours", 72)
if lookback_hours and lookback_hours > 0:
if lookback_hours is not None and lookback_hours > 0:
import time
ref_time = clock if clock is not None else time.time()
cutoff = ref_time - lookback_hours * 3600

View File

@@ -294,9 +294,10 @@ def harvest(
if not _project_matches(d.project or "", scope, invoked_project):
continue
if since_iso and d.ended_at and d.ended_at < since_iso:
# Files are sorted newest-first by mtime; once we see one that
# is older than the cutoff, all remaining files are older too.
break
# Note: files are sorted by mtime but we compare the embedded
# ended_at timestamp — mtime can diverge (copy/touch), so we
# cannot break here; we must continue to check all files.
continue
digests.append(d)
if limit and len(digests) >= limit:
break