fix(workflows): make expression operator/literal parsing quote-aware (#3197)

_evaluate_simple_expression split on operator keywords using naive str.find/split, so a keyword INSIDE a quoted operand was treated as an operator: `inputs.mode == 'read and write'` split on the inner ' and ' and evaluated as `(inputs.mode == 'read) and (write')`. The literal short-circuit was also too greedy -- `'a' == 'b'` matched startswith("'")/endswith("'") and was stripped to the garbage truthy string `a' == 'b`, so `'done' == 'failed'` evaluated truthy and gated the wrong branch.

Add a quote/bracket-aware _find_top_level helper (mirroring the existing _split_top_level_commas) and use it for the and/or/comparison/in/not-in splits; tighten the literal short-circuit to fire only when the opening quote's match is the final char. The docstring already lists comparisons + and/or/not + in/not-in + string literals as supported, so this restores the documented contract.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ali jawwad
2026-06-29 19:53:35 +05:00
committed by GitHub
parent fd185c1fd8
commit 2a9db1d350
2 changed files with 89 additions and 19 deletions

View File

@@ -180,6 +180,35 @@ def _split_top_level_commas(text: str) -> list[str]:
return parts
def _find_top_level(text: str, token: str) -> int:
"""Return the index of the first occurrence of *token* in *text* that lies
outside any quoted string or nested bracket, or ``-1`` if there is none.
Used so operator/keyword splitting (``and``/``or``/``in``/comparisons) does
not match a separator that appears *inside* a quoted operand -- e.g. the
``and`` in ``mode == 'read and write'`` or the ``or`` in ``'approve or reject'``.
"""
quote: str | None = None
depth = 0
i = 0
n = len(text)
while i < n:
ch = text[i]
if quote is not None:
if ch == quote:
quote = None
elif ch in ("'", '"'):
quote = ch
elif ch in "([{":
depth += 1
elif ch in ")]}":
depth = max(0, depth - 1)
elif depth == 0 and text.startswith(token, i):
return i
i += 1
return -1
def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
"""Evaluate a simple expression against the namespace.
@@ -193,11 +222,12 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
"""
expr = expr.strip()
# String literal — check before pipes and operators so quoted strings
# containing | or operator keywords are not mis-parsed.
if (expr.startswith("'") and expr.endswith("'")) or (
expr.startswith('"') and expr.endswith('"')
):
# String literal — only when the WHOLE expression is one quoted string,
# i.e. the opening quote's matching close is the final character. Checking
# startswith/endswith alone would also grab `'a' == 'b'` and strip it to the
# garbage `a' == 'b`; a genuine single literal short-circuits here so quoted
# strings containing `|` or operator keywords are not mis-parsed downstream.
if expr[:1] in ("'", '"') and expr.find(expr[0], 1) == len(expr) - 1:
return expr[1:-1]
# Handle pipe filters
@@ -262,29 +292,33 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
)
# Boolean operators — parse 'or' first (lower precedence) so that
# 'a or b and c' is evaluated as 'a or (b and c)'.
if " or " in expr:
parts = expr.split(" or ", 1)
left = _evaluate_simple_expression(parts[0].strip(), namespace)
right = _evaluate_simple_expression(parts[1].strip(), namespace)
# 'a or b and c' is evaluated as 'a or (b and c)'. Splits are quote/bracket
# aware so a keyword inside a quoted operand (e.g. the 'and' in
# 'read and write') is not mistaken for an operator.
or_idx = _find_top_level(expr, " or ")
if or_idx != -1:
left = _evaluate_simple_expression(expr[:or_idx].strip(), namespace)
right = _evaluate_simple_expression(expr[or_idx + 4:].strip(), namespace)
return bool(left) or bool(right)
if " and " in expr:
parts = expr.split(" and ", 1)
left = _evaluate_simple_expression(parts[0].strip(), namespace)
right = _evaluate_simple_expression(parts[1].strip(), namespace)
and_idx = _find_top_level(expr, " and ")
if and_idx != -1:
left = _evaluate_simple_expression(expr[:and_idx].strip(), namespace)
right = _evaluate_simple_expression(expr[and_idx + 5:].strip(), namespace)
return bool(left) and bool(right)
if expr.startswith("not "):
inner = _evaluate_simple_expression(expr[4:].strip(), namespace)
return not bool(inner)
# Comparison operators (order matters — check multi-char ops first)
# Comparison operators (order matters — check multi-char ops first). Split at
# the first top-level occurrence so an operator inside a quoted operand is
# ignored.
for op in ("!=", "==", ">=", "<=", ">", "<", " not in ", " in "):
if op in expr:
parts = expr.split(op, 1)
left = _evaluate_simple_expression(parts[0].strip(), namespace)
right = _evaluate_simple_expression(parts[1].strip(), namespace)
op_idx = _find_top_level(expr, op)
if op_idx != -1:
left = _evaluate_simple_expression(expr[:op_idx].strip(), namespace)
right = _evaluate_simple_expression(expr[op_idx + len(op):].strip(), namespace)
if op == "==":
return left == right
if op == "!=":

View File

@@ -286,6 +286,42 @@ class TestExpressions:
assert evaluate_expression('{{ [["a", "b"], "c"] }}', ctx) == [["a", "b"], "c"]
assert evaluate_expression("{{ [[1, 2], [3, 4]] }}", ctx) == [[1, 2], [3, 4]]
def test_operator_splitting_is_quote_aware(self):
from specify_cli.workflows.expressions import (
evaluate_condition,
evaluate_expression,
)
from specify_cli.workflows.base import StepContext
# An 'and'/'or'/'in' keyword INSIDE a quoted operand must not be treated
# as a boolean/membership operator: the comparison applies to the whole
# string literal.
ctx = StepContext(inputs={"mode": "read and write"})
assert evaluate_expression("{{ inputs.mode == 'read and write' }}", ctx) is True
assert evaluate_expression("{{ inputs.mode == 'read or write' }}", ctx) is False
# ...also when the quoted literal is on the left of the operator.
left_ctx = StepContext(inputs={"x": "approve or reject"})
assert evaluate_expression("{{ 'approve or reject' == inputs.x }}", left_ctx) is True
# membership against a literal that contains a keyword
assert evaluate_expression("{{ 'cat' in 'cat and dog' }}", StepContext()) is True
# Literal-vs-literal equality no longer mis-strips to a garbage string
# (previously `'done' == 'failed'` short-circuited to the truthy string
# "done' == 'failed").
assert evaluate_condition("{{ 'done' == 'failed' }}", StepContext()) is False
assert evaluate_condition("{{ 'done' == 'done' }}", StepContext()) is True
# A single quoted literal that itself contains operator text is preserved.
assert evaluate_expression("{{ 'a == b' }}", StepContext()) == "a == b"
assert evaluate_expression("{{ 'x and y' }}", StepContext()) == "x and y"
# Regression: ordinary (unquoted-keyword) parsing still works.
plain = StepContext(inputs={"a": 1, "b": 2, "mode": "read"})
assert evaluate_expression("{{ inputs.mode == 'read' }}", plain) is True
assert evaluate_expression("{{ inputs.a == 1 and inputs.b == 2 }}", plain) is True
assert evaluate_expression("{{ inputs.a == 9 or inputs.b == 2 }}", plain) is True
assert evaluate_expression("{{ inputs.missing | default('a and b') }}", plain) == "a and b"
def test_filter_default(self):
from specify_cli.workflows.expressions import evaluate_expression
from specify_cli.workflows.base import StepContext