mirror of
https://github.com/github/spec-kit.git
synced 2026-07-03 12:28:06 +08:00
fix: while/do-while loop condition reads stale iteration-0 step output (#2662)
* fix: while/do-while loop condition reads stale iteration-0 step output
After executing namespaced loop body steps, copy each iteration's
results back to the original unprefixed step key so that
evaluate_condition() sees the latest values instead of stale
iteration-0 data.
Fixes #2592
* address review: cross-platform tests, preserve iteration-0 history
- Rewrite shell scripts in tests to use Python via script files
instead of POSIX syntax, so they pass on Windows CI.
- Snapshot iteration-0 nested-step results under a namespaced key
(parent:child:0) before the first copy-back overwrite, preserving
complete per-iteration history for debugging.
* address review: skip copy-back on paused/failed iterations
Move the status check before the copy-back so that partial results
from paused or failed nested steps (e.g., a gate awaiting input)
do not overwrite the unprefixed key. This preserves correct resume
behavior.
* address review: quote paths in test shell commands
Quote both the Python executable and script file paths in the
run: commands to handle spaces in paths on Windows.
* address review: execute loop body with original IDs
Instead of namespacing step IDs for execution and copying results
back, execute the loop body with original (unprefixed) step IDs so
results naturally land at the right keys. Snapshot previous
iteration results to namespaced keys (parent:child:N) for history
only.
This fixes multi-step loop bodies where step B references step A's
output within the same iteration — previously step B would see
stale data until the copy-back ran after the entire iteration.
* address review: namespaced execution with per-step copy-back
Revert to namespaced step IDs for execution (preserving unique
log entries and state keys per iteration) but copy each step's
result back to the unprefixed key immediately after it completes.
This preserves backward compatibility (same namespaced key format,
same log IDs) while fixing both the condition evaluation bug and
inter-step references within multi-step loop bodies.
* address review: alias after status check, add multi-step body test
- Move per-step aliasing below the PAUSED/FAILED/ABORTED status
check so partial results from incomplete steps are not aliased
back to the unprefixed key.
- Add test_while_loop_multi_step_body_inter_step_refs to exercise
a multi-step loop body where step B reads step A's output within
the same iteration, verifying per-step aliasing works correctly.
Addresses feedback from @doquanghuy (items 2 & 4) and Copilot
review on commit 9d0a222.
* address review: stable fallback IDs, expression-based inter-step test
- Use enumerate() for stable fallback IDs when loop body steps lack
an explicit id (step-0, step-1, etc. instead of always step-0).
- Rewrite multi-step body test so step B uses expression
substitution ({{ steps.step-a.output.stdout }}) instead of
reading the counter file directly, making it a true regression
test for per-step aliasing.
This commit is contained in:
@@ -1889,6 +1889,268 @@ steps:
|
||||
errors = validate_workflow(definition)
|
||||
assert any("invalid default" in e for e in errors), errors
|
||||
|
||||
def test_while_loop_condition_reads_latest_iteration(self, project_dir):
|
||||
"""Regression: while-loop condition must see updated step output
|
||||
from the most recent iteration, not stale iteration-0 data.
|
||||
|
||||
See https://github.com/github/spec-kit/issues/2592
|
||||
"""
|
||||
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
|
||||
from specify_cli.workflows.base import RunStatus
|
||||
|
||||
# Shell step echoes a counter via a file.
|
||||
# Condition: exit_code != 0 means "keep looping" — but a non-zero
|
||||
# exit code would mark the step FAILED and abort the run, so we
|
||||
# use stdout-based comparison instead.
|
||||
#
|
||||
# Iteration 0: counter=1, echoes "1" → not "done" → loop continues
|
||||
# Iteration 1: counter=2, echoes "done" → condition false → stop
|
||||
# Without the fix, condition always reads iteration-0 stdout,
|
||||
# so the loop runs all max_iterations.
|
||||
import sys
|
||||
|
||||
counter_file = project_dir / ".counter"
|
||||
counter_file.write_text("0", encoding="utf-8")
|
||||
py = sys.executable
|
||||
script_file = project_dir / "_tick.py"
|
||||
script_file.write_text(
|
||||
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
|
||||
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
|
||||
"print('done' if n >= 2 else str(n), end='')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
yaml_str = f"""
|
||||
schema_version: "1.0"
|
||||
workflow:
|
||||
id: "while-condition-update"
|
||||
name: "While Condition Update"
|
||||
version: "1.0.0"
|
||||
steps:
|
||||
- id: retry-loop
|
||||
type: while
|
||||
condition: "{{{{ 'done' not in steps.attempt.output.stdout }}}}"
|
||||
max_iterations: 5
|
||||
steps:
|
||||
- id: attempt
|
||||
type: shell
|
||||
run: '"{py}" "{script_file}"'
|
||||
"""
|
||||
definition = WorkflowDefinition.from_string(yaml_str)
|
||||
engine = WorkflowEngine(project_dir)
|
||||
state = engine.execute(definition)
|
||||
|
||||
assert state.status == RunStatus.COMPLETED
|
||||
# The unprefixed key should reflect the latest iteration's result.
|
||||
assert state.step_results["attempt"]["output"]["stdout"] == "done"
|
||||
# Namespaced iteration-1 result should also exist.
|
||||
assert "retry-loop:attempt:1" in state.step_results
|
||||
# Counter should be 2 (iteration 0 + iteration 1), not 5.
|
||||
assert counter_file.read_text(encoding="utf-8").strip() == "2"
|
||||
|
||||
def test_do_while_loop_condition_reads_latest_iteration(self, project_dir):
|
||||
"""Regression: do-while loop condition must also see updated output.
|
||||
|
||||
See https://github.com/github/spec-kit/issues/2592
|
||||
"""
|
||||
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
|
||||
from specify_cli.workflows.base import RunStatus
|
||||
|
||||
import sys
|
||||
|
||||
counter_file = project_dir / ".counter"
|
||||
counter_file.write_text("0", encoding="utf-8")
|
||||
py = sys.executable
|
||||
script_file = project_dir / "_tick.py"
|
||||
script_file.write_text(
|
||||
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
|
||||
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
|
||||
"print('done' if n >= 2 else str(n), end='')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
yaml_str = f"""
|
||||
schema_version: "1.0"
|
||||
workflow:
|
||||
id: "do-while-condition-update"
|
||||
name: "Do While Condition Update"
|
||||
version: "1.0.0"
|
||||
steps:
|
||||
- id: retry-loop
|
||||
type: do-while
|
||||
condition: "{{{{ 'done' not in steps.attempt.output.stdout }}}}"
|
||||
max_iterations: 5
|
||||
steps:
|
||||
- id: attempt
|
||||
type: shell
|
||||
run: '"{py}" "{script_file}"'
|
||||
"""
|
||||
definition = WorkflowDefinition.from_string(yaml_str)
|
||||
engine = WorkflowEngine(project_dir)
|
||||
state = engine.execute(definition)
|
||||
|
||||
assert state.status == RunStatus.COMPLETED
|
||||
assert state.step_results["attempt"]["output"]["stdout"] == "done"
|
||||
assert counter_file.read_text(encoding="utf-8").strip() == "2"
|
||||
|
||||
def test_while_loop_runs_to_max_when_condition_stays_true(self, project_dir):
|
||||
"""While loop must still run to max_iterations when the condition
|
||||
never becomes false — copy-back must not break this path.
|
||||
|
||||
See https://github.com/github/spec-kit/issues/2592
|
||||
"""
|
||||
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
|
||||
from specify_cli.workflows.base import RunStatus
|
||||
|
||||
import sys
|
||||
|
||||
counter_file = project_dir / ".counter"
|
||||
counter_file.write_text("0", encoding="utf-8")
|
||||
py = sys.executable
|
||||
script_file = project_dir / "_tick.py"
|
||||
script_file.write_text(
|
||||
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
|
||||
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
|
||||
"print('pending', end='')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
yaml_str = f"""
|
||||
schema_version: "1.0"
|
||||
workflow:
|
||||
id: "while-max-iterations"
|
||||
name: "While Max Iterations"
|
||||
version: "1.0.0"
|
||||
steps:
|
||||
- id: retry-loop
|
||||
type: while
|
||||
condition: "{{{{ 'done' not in steps.tick.output.stdout }}}}"
|
||||
max_iterations: 3
|
||||
steps:
|
||||
- id: tick
|
||||
type: shell
|
||||
run: '"{py}" "{script_file}"'
|
||||
"""
|
||||
definition = WorkflowDefinition.from_string(yaml_str)
|
||||
engine = WorkflowEngine(project_dir)
|
||||
state = engine.execute(definition)
|
||||
|
||||
assert state.status == RunStatus.COMPLETED
|
||||
# All 3 iterations ran (iteration 0 + 2 loop iterations).
|
||||
assert counter_file.read_text(encoding="utf-8").strip() == "3"
|
||||
# Unprefixed key holds the last iteration's result.
|
||||
assert state.step_results["tick"]["output"]["stdout"] == "pending"
|
||||
# Namespaced keys for loop iterations exist.
|
||||
assert "retry-loop:tick:1" in state.step_results
|
||||
assert "retry-loop:tick:2" in state.step_results
|
||||
|
||||
def test_do_while_loop_runs_to_max_when_condition_stays_true(self, project_dir):
|
||||
"""Do-while loop must still run to max_iterations when the condition
|
||||
never becomes false.
|
||||
|
||||
See https://github.com/github/spec-kit/issues/2592
|
||||
"""
|
||||
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
|
||||
from specify_cli.workflows.base import RunStatus
|
||||
|
||||
import sys
|
||||
|
||||
counter_file = project_dir / ".counter"
|
||||
counter_file.write_text("0", encoding="utf-8")
|
||||
py = sys.executable
|
||||
script_file = project_dir / "_tick.py"
|
||||
script_file.write_text(
|
||||
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
|
||||
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
|
||||
"print('pending', end='')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
yaml_str = f"""
|
||||
schema_version: "1.0"
|
||||
workflow:
|
||||
id: "do-while-max-iterations"
|
||||
name: "Do While Max Iterations"
|
||||
version: "1.0.0"
|
||||
steps:
|
||||
- id: retry-loop
|
||||
type: do-while
|
||||
condition: "{{{{ 'done' not in steps.tick.output.stdout }}}}"
|
||||
max_iterations: 3
|
||||
steps:
|
||||
- id: tick
|
||||
type: shell
|
||||
run: '"{py}" "{script_file}"'
|
||||
"""
|
||||
definition = WorkflowDefinition.from_string(yaml_str)
|
||||
engine = WorkflowEngine(project_dir)
|
||||
state = engine.execute(definition)
|
||||
|
||||
assert state.status == RunStatus.COMPLETED
|
||||
assert counter_file.read_text(encoding="utf-8").strip() == "3"
|
||||
assert state.step_results["tick"]["output"]["stdout"] == "pending"
|
||||
|
||||
def test_while_loop_multi_step_body_inter_step_refs(self, project_dir):
|
||||
"""Multi-step loop body: step B must see step A's output from the
|
||||
current iteration, not a stale previous one.
|
||||
|
||||
See https://github.com/github/spec-kit/issues/2592
|
||||
"""
|
||||
from specify_cli.workflows.engine import WorkflowEngine, WorkflowDefinition
|
||||
from specify_cli.workflows.base import RunStatus
|
||||
|
||||
import sys
|
||||
|
||||
counter_file = project_dir / ".counter"
|
||||
counter_file.write_text("0", encoding="utf-8")
|
||||
py = sys.executable
|
||||
|
||||
# Step A: increments counter file, echoes the value.
|
||||
step_a_file = project_dir / "_step_a.py"
|
||||
step_a_file.write_text(
|
||||
f"import pathlib; p = pathlib.Path(r'{counter_file}')\n"
|
||||
"n = int(p.read_text()) + 1; p.write_text(str(n))\n"
|
||||
"print(str(n), end='')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Step B uses {{ steps.step-a.output.stdout }} expression
|
||||
# substitution in its run command so the engine resolves the
|
||||
# aliased unprefixed key — this is the real inter-step test.
|
||||
yaml_str = f"""
|
||||
schema_version: "1.0"
|
||||
workflow:
|
||||
id: "while-multi-step"
|
||||
name: "While Multi Step"
|
||||
version: "1.0.0"
|
||||
steps:
|
||||
- id: retry-loop
|
||||
type: while
|
||||
condition: "{{{{ 'done' not in steps.step-a.output.stdout }}}}"
|
||||
max_iterations: 3
|
||||
steps:
|
||||
- id: step-a
|
||||
type: shell
|
||||
run: '"{py}" "{step_a_file}"'
|
||||
- id: step-b
|
||||
type: shell
|
||||
run: "echo b-saw-{{{{ steps.step-a.output.stdout }}}}"
|
||||
"""
|
||||
definition = WorkflowDefinition.from_string(yaml_str)
|
||||
engine = WorkflowEngine(project_dir)
|
||||
state = engine.execute(definition)
|
||||
|
||||
assert state.status == RunStatus.COMPLETED
|
||||
# Both unprefixed keys reflect the latest iteration's results.
|
||||
assert state.step_results["step-a"]["output"]["stdout"] == "3"
|
||||
# Step B saw step A's output via expression substitution.
|
||||
assert "b-saw-3" in state.step_results["step-b"]["output"]["stdout"]
|
||||
# Namespaced keys exist for loop iterations.
|
||||
assert "retry-loop:step-a:1" in state.step_results
|
||||
assert "retry-loop:step-b:1" in state.step_results
|
||||
assert "retry-loop:step-a:2" in state.step_results
|
||||
assert "retry-loop:step-b:2" in state.step_results
|
||||
|
||||
|
||||
# ===== State Persistence Tests =====
|
||||
|
||||
|
||||
Reference in New Issue
Block a user