diff --git a/agent/moa_loop.py b/agent/moa_loop.py
index baca77548bed..3241a6e93c7e 100644
--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -773,13 +773,33 @@ class MoAChatCompletions:
         reference_outputs: list[tuple[str, str, Any]] = []
         ref_messages = _reference_messages(messages)
 
+        # Fan-out cadence. "per_iteration" (default): advisors re-run whenever
+        # the advisory view changes — i.e. every tool iteration, since the
+        # view grows with each tool result. "user_turn": advisors run ONCE per
+        # user turn; subsequent tool iterations reuse that turn's advice and
+        # the aggregator acts alone (the original MoA shape: synthesize at the
+        # start, then let the acting model work). Implemented by hashing only
+        # the prefix up to the LAST USER message so mid-turn growth doesn't
+        # change the signature — iteration 2+ becomes a cache HIT.
+        fanout_mode = str(preset.get("fanout") or "per_iteration").strip().lower()
+        sig_messages = ref_messages
+        if fanout_mode == "user_turn":
+            last_user_idx = None
+            for _i in range(len(ref_messages) - 1, -1, -1):
+                if ref_messages[_i].get("role") == "user":
+                    last_user_idx = _i
+                    break
+            if last_user_idx is not None:
+                sig_messages = ref_messages[: last_user_idx + 1]
+
         # Turn-scoped cache: only run + display references when the advisory
         # view changed (i.e. a new user turn). Within one turn the agent loop
-        # calls create() once per tool iteration with the same advisory view;
-        # reuse the cached outputs and skip both the re-run and the re-emit.
+        # calls create() once per tool iteration; in user_turn mode the
+        # signature is stable across those iterations (prefix hash above), so
+        # the fan-out runs once per user turn and iterations reuse the advice.
         _sig = hashlib.sha256(
             "\u0000".join(
-                f"{m.get('role')}:{m.get('content')}" for m in ref_messages
+                f"{m.get('role')}:{m.get('content')}" for m in sig_messages
             ).encode("utf-8", "replace")
         ).hexdigest()
         _cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
diff --git a/hermes_cli/moa_config.py b/hermes_cli/moa_config.py
index 7f88f8ac474c..af1241418bed 100644
--- a/hermes_cli/moa_config.py
+++ b/hermes_cli/moa_config.py
@@ -67,6 +67,12 @@ def _coerce_int_or_none(value: Any) -> int | None:
     return n if n > 0 else None
 
 
+def _coerce_fanout(value: Any) -> str:
+    """Normalize the fan-out cadence; unknown values fall back to default."""
+    mode = str(value or "").strip().lower()
+    return mode if mode in {"per_iteration", "user_turn"} else "per_iteration"
+
+
 def _clean_slot(slot: Any) -> dict[str, str] | None:
     if not isinstance(slot, dict):
         return None
@@ -94,6 +100,7 @@ def _default_preset() -> dict[str, Any]:
         "aggregator_temperature": None,
         "max_tokens": 4096,
         "reference_max_tokens": None,
+        "fanout": "per_iteration",
         "enabled": True,
     }
 
@@ -131,6 +138,13 @@ def _normalize_preset(raw: Any) -> dict[str, Any]:
         # judgement, so capping roughly halves per-turn wall time. Does NOT cap
         # the acting aggregator (its output is the user-visible answer).
         "reference_max_tokens": _coerce_int_or_none(raw.get("reference_max_tokens")),
+        # When the reference fan-out runs. "per_iteration" (default) re-runs
+        # the advisors whenever the advisory view changes — i.e. every tool
+        # iteration, so advice tracks live task state. "user_turn" runs the
+        # advisors ONCE per user turn (the original MoA shape): the
+        # aggregator gets their upfront plan-level advice, then acts alone
+        # for the rest of the tool loop.
+        "fanout": _coerce_fanout(raw.get("fanout")),
     }
 
 
@@ -177,6 +191,7 @@ def normalize_moa_config(raw: Any) -> dict[str, Any]:
         "aggregator_temperature": active["aggregator_temperature"],
         "max_tokens": active["max_tokens"],
         "reference_max_tokens": active.get("reference_max_tokens"),
+        "fanout": active.get("fanout", "per_iteration"),
         "enabled": active["enabled"],
     }