From 3036fe6954cc4d8e6669ab0a643e0c4b4272cab1 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 30 Jun 2026 02:52:08 +0700 Subject: [PATCH] fix(workflows): reject a fan-in wait_for that names an unknown step at validation (#3225) * fix(workflows): reject a fan-in wait_for that names an unknown step at validation * fix(workflows): reject fan-in wait_for self-reference and non-string entries Address review feedback on the fan-in wait_for validator: - A fan-in's own id is added to seen_ids before the wait_for check, so `wait_for: []` passed validation while producing a silent empty join at runtime. Reject self-references explicitly. - Non-string entries (e.g. YAML `wait_for: [123]`) were skipped by the isinstance(str) guard and validated even though they can never match a real step id. Flag them as wiring errors. Add coverage for both cases. --- src/specify_cli/workflows/engine.py | 34 ++++++++ tests/test_workflows.py | 122 ++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py index 0e11a6b7d..23b5b0c5c 100644 --- a/src/specify_cli/workflows/engine.py +++ b/src/specify_cli/workflows/engine.py @@ -296,6 +296,40 @@ def _validate_steps( f"boolean, got {type(coe).__name__}." ) + # Fan-in: every wait_for id must reference a step declared at or before + # this point. An id not yet seen is either a typo (unknown step) or a + # forward reference (the target runs after this fan-in, so its results + # cannot exist yet) — both are wiring errors that previously surfaced as + # a silent empty result + COMPLETED. A step that is declared but only + # conditionally executed (e.g. inside an if/switch branch) is still + # "seen" here, so a legitimately-empty result at runtime stays valid. + if step_type == "fan-in": + wait_for = step_config.get("wait_for") + if isinstance(wait_for, list): + for wid in wait_for: + if not isinstance(wid, str): + # A non-string entry (e.g. YAML `wait_for: [123]`) can + # never match a real step id, so the join is silently + # empty at runtime — surface it as a wiring error. + errors.append( + f"Fan-in step {step_id!r}: 'wait_for' entries must " + f"be step-id strings, got {type(wid).__name__} " + f"({wid!r})." + ) + elif wid == step_id: + # The fan-in's own id is already in seen_ids by now, so + # a self-reference would pass the membership check below + # while still producing an empty join at runtime. + errors.append( + f"Fan-in step {step_id!r}: 'wait_for' references " + f"itself; a fan-in cannot wait for its own results." + ) + elif wid not in seen_ids: + errors.append( + f"Fan-in step {step_id!r}: 'wait_for' references " + f"unknown or not-yet-declared step id {wid!r}." + ) + # Recursively validate nested steps for nested_key in ("then", "else", "steps"): nested = step_config.get(nested_key) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index cee02c46b..6ad4fe63b 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -2007,6 +2007,128 @@ class TestFanInStep: assert any("non-empty list" in e for e in errors) +class TestFanInWaitForValidation: + """fan-in wait_for must reference a declared step (no silent empty join).""" + + @staticmethod + def _errors(yaml_text): + from specify_cli.workflows.engine import ( + WorkflowDefinition, + validate_workflow, + ) + + return validate_workflow(WorkflowDefinition.from_string(yaml_text)) + + def test_unknown_wait_for_id_is_rejected(self): + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: collect + type: fan-in + wait_for: [ghost] +""") + assert any( + "unknown or not-yet-declared step id 'ghost'" in e for e in errors + ) + + def test_wait_for_declared_earlier_step_passes(self): + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: produce + type: command + command: speckit.implement + - id: collect + type: fan-in + wait_for: [produce] +""") + assert not any("wait_for" in e for e in errors) + + def test_wait_for_conditionally_declared_step_passes(self): + # A step declared inside an if-branch may be skipped at runtime, but it is + # still "declared", so referencing it must validate — a legitimately-empty + # runtime join stays valid. + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: maybe + type: if + condition: "{{ inputs.flag }}" + then: + - id: branch_task + type: command + command: speckit.implement + - id: collect + type: fan-in + wait_for: [branch_task] +""") + assert not any("wait_for" in e for e in errors) + + def test_forward_reference_is_rejected(self): + # wait_for points at a step declared AFTER the fan-in; its results cannot + # exist when the fan-in runs, so it is flagged. + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: collect + type: fan-in + wait_for: [later] + - id: later + type: command + command: speckit.implement +""") + assert any( + "unknown or not-yet-declared step id 'later'" in e for e in errors + ) + + def test_self_reference_is_rejected(self): + # A fan-in's own id is in scope by the time it is validated, so a + # self-reference slips past the membership check while still producing + # an empty join at runtime. + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: collect + type: fan-in + wait_for: [collect] +""") + assert any( + "references itself" in e and "collect" in e for e in errors + ) + + def test_non_string_wait_for_entry_is_rejected(self): + # A non-string entry (e.g. YAML `wait_for: [123]`) can never match a + # real step id, so it must be flagged rather than silently ignored. + errors = self._errors(""" +workflow: + id: wf + name: wf + version: "1.0.0" +steps: + - id: collect + type: fan-in + wait_for: [123] +""") + assert any( + "must be step-id strings" in e and "int" in e for e in errors + ) + + # ===== Workflow Definition Tests ===== class TestWorkflowDefinition: