From 2a9db1d350b3c8a362d9009fb9fc9f1ac355511b Mon Sep 17 00:00:00 2001 From: Ali jawwad <33836051+jawwad-ali@users.noreply.github.com> Date: Mon, 29 Jun 2026 19:53:35 +0500 Subject: [PATCH] fix(workflows): make expression operator/literal parsing quote-aware (#3197) _evaluate_simple_expression split on operator keywords using naive str.find/split, so a keyword INSIDE a quoted operand was treated as an operator: `inputs.mode == 'read and write'` split on the inner ' and ' and evaluated as `(inputs.mode == 'read) and (write')`. The literal short-circuit was also too greedy -- `'a' == 'b'` matched startswith("'")/endswith("'") and was stripped to the garbage truthy string `a' == 'b`, so `'done' == 'failed'` evaluated truthy and gated the wrong branch. Add a quote/bracket-aware _find_top_level helper (mirroring the existing _split_top_level_commas) and use it for the and/or/comparison/in/not-in splits; tighten the literal short-circuit to fire only when the opening quote's match is the final char. The docstring already lists comparisons + and/or/not + in/not-in + string literals as supported, so this restores the documented contract. Co-authored-by: Claude Opus 4.8 (1M context) --- src/specify_cli/workflows/expressions.py | 72 +++++++++++++++++------- tests/test_workflows.py | 36 ++++++++++++ 2 files changed, 89 insertions(+), 19 deletions(-) diff --git a/src/specify_cli/workflows/expressions.py b/src/specify_cli/workflows/expressions.py index b7ed17e80..6ea3a5f49 100644 --- a/src/specify_cli/workflows/expressions.py +++ b/src/specify_cli/workflows/expressions.py @@ -180,6 +180,35 @@ def _split_top_level_commas(text: str) -> list[str]: return parts +def _find_top_level(text: str, token: str) -> int: + """Return the index of the first occurrence of *token* in *text* that lies + outside any quoted string or nested bracket, or ``-1`` if there is none. + + Used so operator/keyword splitting (``and``/``or``/``in``/comparisons) does + not match a separator that appears *inside* a quoted operand -- e.g. the + ``and`` in ``mode == 'read and write'`` or the ``or`` in ``'approve or reject'``. + """ + quote: str | None = None + depth = 0 + i = 0 + n = len(text) + while i < n: + ch = text[i] + if quote is not None: + if ch == quote: + quote = None + elif ch in ("'", '"'): + quote = ch + elif ch in "([{": + depth += 1 + elif ch in ")]}": + depth = max(0, depth - 1) + elif depth == 0 and text.startswith(token, i): + return i + i += 1 + return -1 + + def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: """Evaluate a simple expression against the namespace. @@ -193,11 +222,12 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: """ expr = expr.strip() - # String literal — check before pipes and operators so quoted strings - # containing | or operator keywords are not mis-parsed. - if (expr.startswith("'") and expr.endswith("'")) or ( - expr.startswith('"') and expr.endswith('"') - ): + # String literal — only when the WHOLE expression is one quoted string, + # i.e. the opening quote's matching close is the final character. Checking + # startswith/endswith alone would also grab `'a' == 'b'` and strip it to the + # garbage `a' == 'b`; a genuine single literal short-circuits here so quoted + # strings containing `|` or operator keywords are not mis-parsed downstream. + if expr[:1] in ("'", '"') and expr.find(expr[0], 1) == len(expr) - 1: return expr[1:-1] # Handle pipe filters @@ -262,29 +292,33 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any: ) # Boolean operators — parse 'or' first (lower precedence) so that - # 'a or b and c' is evaluated as 'a or (b and c)'. - if " or " in expr: - parts = expr.split(" or ", 1) - left = _evaluate_simple_expression(parts[0].strip(), namespace) - right = _evaluate_simple_expression(parts[1].strip(), namespace) + # 'a or b and c' is evaluated as 'a or (b and c)'. Splits are quote/bracket + # aware so a keyword inside a quoted operand (e.g. the 'and' in + # 'read and write') is not mistaken for an operator. + or_idx = _find_top_level(expr, " or ") + if or_idx != -1: + left = _evaluate_simple_expression(expr[:or_idx].strip(), namespace) + right = _evaluate_simple_expression(expr[or_idx + 4:].strip(), namespace) return bool(left) or bool(right) - if " and " in expr: - parts = expr.split(" and ", 1) - left = _evaluate_simple_expression(parts[0].strip(), namespace) - right = _evaluate_simple_expression(parts[1].strip(), namespace) + and_idx = _find_top_level(expr, " and ") + if and_idx != -1: + left = _evaluate_simple_expression(expr[:and_idx].strip(), namespace) + right = _evaluate_simple_expression(expr[and_idx + 5:].strip(), namespace) return bool(left) and bool(right) if expr.startswith("not "): inner = _evaluate_simple_expression(expr[4:].strip(), namespace) return not bool(inner) - # Comparison operators (order matters — check multi-char ops first) + # Comparison operators (order matters — check multi-char ops first). Split at + # the first top-level occurrence so an operator inside a quoted operand is + # ignored. for op in ("!=", "==", ">=", "<=", ">", "<", " not in ", " in "): - if op in expr: - parts = expr.split(op, 1) - left = _evaluate_simple_expression(parts[0].strip(), namespace) - right = _evaluate_simple_expression(parts[1].strip(), namespace) + op_idx = _find_top_level(expr, op) + if op_idx != -1: + left = _evaluate_simple_expression(expr[:op_idx].strip(), namespace) + right = _evaluate_simple_expression(expr[op_idx + len(op):].strip(), namespace) if op == "==": return left == right if op == "!=": diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 988730d78..b239cb9a4 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -286,6 +286,42 @@ class TestExpressions: assert evaluate_expression('{{ [["a", "b"], "c"] }}', ctx) == [["a", "b"], "c"] assert evaluate_expression("{{ [[1, 2], [3, 4]] }}", ctx) == [[1, 2], [3, 4]] + def test_operator_splitting_is_quote_aware(self): + from specify_cli.workflows.expressions import ( + evaluate_condition, + evaluate_expression, + ) + from specify_cli.workflows.base import StepContext + + # An 'and'/'or'/'in' keyword INSIDE a quoted operand must not be treated + # as a boolean/membership operator: the comparison applies to the whole + # string literal. + ctx = StepContext(inputs={"mode": "read and write"}) + assert evaluate_expression("{{ inputs.mode == 'read and write' }}", ctx) is True + assert evaluate_expression("{{ inputs.mode == 'read or write' }}", ctx) is False + # ...also when the quoted literal is on the left of the operator. + left_ctx = StepContext(inputs={"x": "approve or reject"}) + assert evaluate_expression("{{ 'approve or reject' == inputs.x }}", left_ctx) is True + # membership against a literal that contains a keyword + assert evaluate_expression("{{ 'cat' in 'cat and dog' }}", StepContext()) is True + + # Literal-vs-literal equality no longer mis-strips to a garbage string + # (previously `'done' == 'failed'` short-circuited to the truthy string + # "done' == 'failed"). + assert evaluate_condition("{{ 'done' == 'failed' }}", StepContext()) is False + assert evaluate_condition("{{ 'done' == 'done' }}", StepContext()) is True + + # A single quoted literal that itself contains operator text is preserved. + assert evaluate_expression("{{ 'a == b' }}", StepContext()) == "a == b" + assert evaluate_expression("{{ 'x and y' }}", StepContext()) == "x and y" + + # Regression: ordinary (unquoted-keyword) parsing still works. + plain = StepContext(inputs={"a": 1, "b": 2, "mode": "read"}) + assert evaluate_expression("{{ inputs.mode == 'read' }}", plain) is True + assert evaluate_expression("{{ inputs.a == 1 and inputs.b == 2 }}", plain) is True + assert evaluate_expression("{{ inputs.a == 9 or inputs.b == 2 }}", plain) is True + assert evaluate_expression("{{ inputs.missing | default('a and b') }}", plain) == "a and b" + def test_filter_default(self): from specify_cli.workflows.expressions import evaluate_expression from specify_cli.workflows.base import StepContext