mirror of
https://github.com/github/spec-kit.git
synced 2026-07-03 12:28:06 +08:00
feat(workflows): add from_json expression filter (#2961)
* feat(workflows): add from_json expression filter Step outputs captured as strings could never become typed values in templates - the filter set was default/join/map/contains only, so e.g. a fan-out items: could never consume a step's JSON stdout. Add an arg-less from_json pipe filter with parse-or-raise semantics: invalid JSON or non-string input raises a clear ValueError rather than passing through silently. Fixes #2960 * fix(expressions): make from_json strict — reject any arguments Address review (#2961): from_json('x') and from_json() previously fell through to a silent passthrough of the unparsed value. Reject any parenthesized form with a clear error so mis-wired templates fail loudly. Rename test to ...parses_object (JSON under test is an object) and add coverage for the strict no-arguments behavior. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * docs(workflows): document the from_json expression filter Address Copilot review: the user-facing filter references omitted the newly added `from_json` filter. Add it to the ARCHITECTURE.md filter table (with the `{{ steps.emit.output.stdout | from_json }}` example) and to the filter enumerations in workflows/README.md and docs/reference/workflows.md so the docs match the evaluator's capabilities. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(workflows): make from_json strictness reject trailing tokens; fix docstring Address Copilot review: - Strictness only rejected parenthesized forms, so typos like `| from_json)` or `| from_json extra` still fell through to the unknown-filter path and silently returned the unparsed value. Match on the leading filter token and require the whole filter to be exactly `from_json`, so every mis-wired form raises. Extend the rejection test to cover the trailing-token cases. - The module docstring claimed "no imports", which is misleading now that the module imports `json`. Reword to state the actual sandbox guarantee: templates cannot do file I/O, import modules, or run arbitrary code. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -280,7 +280,7 @@ Steps can reference inputs and previous step outputs using `{{ expression }}` sy
|
|||||||
| `steps.specify.output.file` | Output from a previous step |
|
| `steps.specify.output.file` | Output from a previous step |
|
||||||
| `item` | Current item in a fan-out iteration |
|
| `item` | Current item in a fan-out iteration |
|
||||||
|
|
||||||
Available filters: `default`, `join`, `contains`, `map`.
|
Available filters: `default`, `join`, `contains`, `map`, `from_json`.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
"""Sandboxed expression evaluator for workflow templates.
|
"""Sandboxed expression evaluator for workflow templates.
|
||||||
|
|
||||||
Provides a safe Jinja2 subset for evaluating expressions in workflow YAML.
|
Provides a safe Jinja2 subset for evaluating expressions in workflow YAML.
|
||||||
No file I/O, no imports, no arbitrary code execution.
|
Templates cannot perform file I/O, import modules, or run arbitrary code —
|
||||||
|
the evaluator only walks the namespace and applies a fixed set of filters.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -57,6 +59,23 @@ def _filter_contains(value: Any, substring: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_from_json(value: Any) -> Any:
|
||||||
|
"""Parse a JSON string into a typed value (list/dict/scalar).
|
||||||
|
|
||||||
|
Raises ``ValueError`` on non-string input or invalid JSON — a parse
|
||||||
|
failure here means the pipeline wiring is wrong, and silently
|
||||||
|
passing the unparsed value through would hide it.
|
||||||
|
"""
|
||||||
|
if not isinstance(value, str):
|
||||||
|
raise ValueError(
|
||||||
|
f"from_json: expected a JSON string, got {type(value).__name__}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
return json.loads(value)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise ValueError(f"from_json: invalid JSON: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
# -- Expression resolution ------------------------------------------------
|
# -- Expression resolution ------------------------------------------------
|
||||||
|
|
||||||
_EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}")
|
_EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}")
|
||||||
@@ -122,7 +141,7 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
|
|||||||
- Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=``
|
- Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=``
|
||||||
- Boolean operators: ``and``, ``or``, ``not``
|
- Boolean operators: ``and``, ``or``, ``not``
|
||||||
- ``in``, ``not in``
|
- ``in``, ``not in``
|
||||||
- Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| map('...')``
|
- Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| from_json``, ``| map('...')``
|
||||||
- String and numeric literals
|
- String and numeric literals
|
||||||
"""
|
"""
|
||||||
expr = expr.strip()
|
expr = expr.strip()
|
||||||
@@ -140,6 +159,22 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
|
|||||||
value = _evaluate_simple_expression(parts[0].strip(), namespace)
|
value = _evaluate_simple_expression(parts[0].strip(), namespace)
|
||||||
filter_expr = parts[1].strip()
|
filter_expr = parts[1].strip()
|
||||||
|
|
||||||
|
# `from_json` is strict: it takes no arguments and tolerates no
|
||||||
|
# trailing tokens. Match on the leading filter name and require the
|
||||||
|
# whole filter to be exactly `from_json`, so every mis-wired form
|
||||||
|
# (`from_json()`, `from_json('x')`, `from_json)`, `from_json extra`)
|
||||||
|
# fails loudly instead of silently falling through to the
|
||||||
|
# unknown-filter path and returning the unparsed value. (filter_expr
|
||||||
|
# is already stripped above.)
|
||||||
|
leading = re.match(r"\w+", filter_expr)
|
||||||
|
if leading and leading.group(0) == "from_json":
|
||||||
|
if filter_expr != "from_json":
|
||||||
|
raise ValueError(
|
||||||
|
"from_json: expected '| from_json' with no arguments or "
|
||||||
|
f"trailing tokens, got '| {filter_expr}'"
|
||||||
|
)
|
||||||
|
return _filter_from_json(value)
|
||||||
|
|
||||||
# Parse filter name and argument
|
# Parse filter name and argument
|
||||||
filter_match = re.match(r"(\w+)\((.+)\)", filter_expr)
|
filter_match = re.match(r"(\w+)\((.+)\)", filter_expr)
|
||||||
if filter_match:
|
if filter_match:
|
||||||
|
|||||||
@@ -289,6 +289,59 @@ class TestExpressions:
|
|||||||
ctx = StepContext(inputs={"text": "hello world"})
|
ctx = StepContext(inputs={"text": "hello world"})
|
||||||
assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True
|
assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True
|
||||||
|
|
||||||
|
def test_filter_from_json_parses_object(self):
|
||||||
|
from specify_cli.workflows.expressions import evaluate_expression
|
||||||
|
from specify_cli.workflows.base import StepContext
|
||||||
|
|
||||||
|
ctx = StepContext(
|
||||||
|
steps={"emit": {"output": {"stdout": '{"items": [1, 2, 3]}'}}}
|
||||||
|
)
|
||||||
|
result = evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)
|
||||||
|
assert result == {"items": [1, 2, 3]}
|
||||||
|
|
||||||
|
def test_filter_from_json_invalid_json_raises(self):
|
||||||
|
import pytest
|
||||||
|
from specify_cli.workflows.expressions import evaluate_expression
|
||||||
|
from specify_cli.workflows.base import StepContext
|
||||||
|
|
||||||
|
ctx = StepContext(steps={"emit": {"output": {"stdout": "not json"}}})
|
||||||
|
with pytest.raises(ValueError, match="from_json: invalid JSON"):
|
||||||
|
evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)
|
||||||
|
|
||||||
|
def test_filter_from_json_non_string_raises(self):
|
||||||
|
import pytest
|
||||||
|
from specify_cli.workflows.expressions import evaluate_expression
|
||||||
|
from specify_cli.workflows.base import StepContext
|
||||||
|
|
||||||
|
ctx = StepContext(steps={"emit": {"output": {"exit_code": 0}}})
|
||||||
|
with pytest.raises(ValueError, match="expected a JSON string"):
|
||||||
|
evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx)
|
||||||
|
|
||||||
|
def test_filter_from_json_rejects_malformed_forms(self):
|
||||||
|
# `from_json` is strict: no arguments and no trailing tokens. Every
|
||||||
|
# mis-wired form — parenthesized, accidental arg, or trailing
|
||||||
|
# garbage — must raise rather than silently fall through to the
|
||||||
|
# unknown-filter path and return the unparsed value.
|
||||||
|
import pytest
|
||||||
|
from specify_cli.workflows.expressions import evaluate_expression
|
||||||
|
from specify_cli.workflows.base import StepContext
|
||||||
|
|
||||||
|
ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}})
|
||||||
|
bad_forms = (
|
||||||
|
"from_json()",
|
||||||
|
"from_json('x')",
|
||||||
|
"from_json ()",
|
||||||
|
"from_json ('x')",
|
||||||
|
"from_json)",
|
||||||
|
"from_json extra",
|
||||||
|
"from_json 'x'",
|
||||||
|
)
|
||||||
|
for bad in bad_forms:
|
||||||
|
with pytest.raises(ValueError, match="from_json: expected"):
|
||||||
|
evaluate_expression(
|
||||||
|
"{{ steps.emit.output.stdout | " + bad + " }}", ctx
|
||||||
|
)
|
||||||
|
|
||||||
def test_condition_evaluation(self):
|
def test_condition_evaluation(self):
|
||||||
from specify_cli.workflows.expressions import evaluate_condition
|
from specify_cli.workflows.expressions import evaluate_condition
|
||||||
from specify_cli.workflows.base import StepContext
|
from specify_cli.workflows.base import StepContext
|
||||||
|
|||||||
@@ -119,6 +119,7 @@ Workflow definitions use Jinja2-like `{{ expression }}` syntax for dynamic value
|
|||||||
| Filter: `join` | `{{ list \| join(', ') }}` | Join list elements |
|
| Filter: `join` | `{{ list \| join(', ') }}` | Join list elements |
|
||||||
| Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check |
|
| Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check |
|
||||||
| Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item |
|
| Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item |
|
||||||
|
| Filter: `from_json` | `{{ steps.emit.output.stdout \| from_json }}` | Parse a JSON string into a typed value (raises on invalid JSON) |
|
||||||
|
|
||||||
**Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings.
|
**Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings.
|
||||||
|
|
||||||
|
|||||||
@@ -332,7 +332,7 @@ condition: "{{ steps.run-tests.output.exit_code != 0 }}"
|
|||||||
message: "{{ status | default('pending') }}"
|
message: "{{ status | default('pending') }}"
|
||||||
```
|
```
|
||||||
|
|
||||||
Supported filters: `default`, `join`, `contains`, `map`.
|
Supported filters: `default`, `join`, `contains`, `map`, `from_json`.
|
||||||
|
|
||||||
### Runtime Context
|
### Runtime Context
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user