From 10dfd32a3e27d0b7a71babb8a33af320354d324f Mon Sep 17 00:00:00 2001 From: PerishFire <39043006+PerishCode@users.noreply.github.com> Date: Mon, 8 Jun 2026 14:24:44 +0800 Subject: [PATCH] Revert "feat: add screenshot-based visual validation to critique loop (#3660)" (#3865) This reverts commit 931780c914d88d864c83646cc0d8770c953b7411. --- apps/daemon/package.json | 4 - apps/daemon/src/plugins/atoms.ts | 1 - apps/daemon/src/plugins/atoms/built-ins.ts | 51 +- apps/daemon/src/plugins/atoms/registry.ts | 3 - .../src/plugins/atoms/visual-validation.ts | 672 ------------------ apps/daemon/src/plugins/index.ts | 2 - apps/daemon/src/plugins/pipeline-schedule.ts | 106 --- apps/daemon/src/server.ts | 138 +--- .../tests/plugins-atom-registry.test.ts | 6 +- apps/daemon/tests/plugins-atoms-info.test.ts | 10 +- .../plugins-bundled-atoms-roster.test.ts | 1 - .../plugins-bundled-scenarios-roster.test.ts | 4 - .../daemon/tests/plugins-headless-run.test.ts | 328 --------- .../tests/plugins-pipeline-runner.test.ts | 6 - .../tests/plugins-pipeline-schedule.test.ts | 163 ----- .../tests/plugins-scenario-fallback.test.ts | 3 - .../tests/plugins-visual-validation.test.ts | 616 ---------------- nix/pnpm-deps.nix | 4 +- .../atoms/visual-validation/SKILL.md | 23 - .../atoms/visual-validation/open-design.json | 39 - .../3d-creator-portfolio/example.html | 2 +- .../velar-luxury-real-estate/example.html | 2 +- .../scenarios/od-new-generation/SKILL.md | 7 +- .../scenarios/od-tune-collab/SKILL.md | 6 +- pnpm-lock.yaml | 12 - tools/pack/src/linux.ts | 6 +- tools/pack/src/mac/app.ts | 6 +- tools/pack/src/resources.ts | 104 +-- tools/pack/src/win/resources.ts | 20 +- tools/pack/tests/mac.test.ts | 3 - tools/pack/tests/playwright-fixture.ts | 85 --- tools/pack/tests/resources.test.ts | 239 +------ tools/pack/tests/win-resources.test.ts | 62 +- 33 files changed, 66 insertions(+), 2668 deletions(-) delete mode 100644 apps/daemon/src/plugins/atoms/visual-validation.ts delete mode 100644 apps/daemon/src/plugins/pipeline-schedule.ts delete mode 100644 apps/daemon/tests/plugins-pipeline-schedule.test.ts delete mode 100644 apps/daemon/tests/plugins-visual-validation.test.ts delete mode 100644 plugins/_official/atoms/visual-validation/SKILL.md delete mode 100644 plugins/_official/atoms/visual-validation/open-design.json delete mode 100644 tools/pack/tests/playwright-fixture.ts diff --git a/apps/daemon/package.json b/apps/daemon/package.json index bfc456042..c49148614 100644 --- a/apps/daemon/package.json +++ b/apps/daemon/package.json @@ -51,11 +51,8 @@ "jszip": "3.10.1", "multer": "2.1.1", "node-pty": "1.1.0", - "pixelmatch": "7.2.0", - "playwright": "1.60.0", "posthog-node": "5.34.6", "prom-client": "15.1.3", - "pngjs": "7.0.0", "tar": "7.5.15", "undici": "7.25.0" }, @@ -64,7 +61,6 @@ "@types/express": "5.0.6", "@types/multer": "2.1.0", "@types/node": "20.19.39", - "@types/pngjs": "6.0.5", "typescript": "5.9.3", "vitest": "4.1.6" }, diff --git a/apps/daemon/src/plugins/atoms.ts b/apps/daemon/src/plugins/atoms.ts index 23bd8ac37..d85c03abd 100644 --- a/apps/daemon/src/plugins/atoms.ts +++ b/apps/daemon/src/plugins/atoms.ts @@ -27,7 +27,6 @@ export const FIRST_PARTY_ATOMS: ReadonlyArray = [ { id: 'live-artifact', label: 'Live artifact', description: 'Create/refresh live artifacts.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] }, { id: 'connector', label: 'Connector', description: 'Composio connector tool calls.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] }, { id: 'critique-theater', label: 'Critique theater', description: '5-dim panel critique; devloop signal.', status: 'implemented', taskKinds: ['new-generation', 'code-migration', 'figma-migration', 'tune-collab'] }, - { id: 'visual-validation', label: 'Visual validation', description: 'Render the current artifact, compare it to reference screenshots, and feed the result into critique scoring.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] }, // Phase 6/7/8 atoms — promoted from 'planned' to 'implemented' // by the §3.N1-N4 / §3.O2-O5 / §3.P1-P2 / §3.Q2 / §3.S1 slices. { id: 'code-import', label: 'Code import', description: 'Walk an existing repo into /code/index.json.', status: 'implemented', taskKinds: ['code-migration'] }, diff --git a/apps/daemon/src/plugins/atoms/built-ins.ts b/apps/daemon/src/plugins/atoms/built-ins.ts index cb2e74378..4dbfbdb56 100644 --- a/apps/daemon/src/plugins/atoms/built-ins.ts +++ b/apps/daemon/src/plugins/atoms/built-ins.ts @@ -1,16 +1,13 @@ // Plan §3.D — built-in atom workers. // -// Registered on first use into the worker registry. Every implemented -// atom gets at least a permissive worker so the registry-driven -// pipeline runner stays at parity with the v1 stub for atoms whose -// real work happens entirely inside the agent CLI (file-write, -// todo-write, media-image, …) — the daemon has no independent ground -// truth to observe there and shipping a real watcher would force the -// agent into a fixed protocol we explicitly kept out of scope. -// -// Planned atoms are not registered at all. Plugin doctor already warns -// that those atoms are not runnable yet, and skipping registration keeps -// explicit pipeline stages from masquerading as successful no-op runs. +// Registered on first use into the worker registry. Every atom in +// FIRST_PARTY_ATOMS gets at least a permissive worker so the +// registry-driven pipeline runner stays at parity with the v1 stub +// for atoms whose real work happens entirely inside the agent CLI +// (file-write, todo-write, media-image, …) — the daemon has no +// independent ground truth to observe there and shipping a real +// watcher would force the agent into a fixed protocol we explicitly +// kept out of scope. // // One atom does have a daemon-observable signal today: // `critique-theater`. The worker walks the run's devloop audit log @@ -26,14 +23,12 @@ import { type AtomOutcome, type AtomWorkerContext, } from './registry.js'; -import { runVisualValidation } from './visual-validation.js'; let installed = false; export function registerBuiltInAtomWorkers(): void { if (installed) return; for (const atom of FIRST_PARTY_ATOMS) { - if (atom.status !== 'implemented') continue; if (atom.id === 'critique-theater') { registerAtomWorker({ id: atom.id, @@ -42,14 +37,6 @@ export function registerBuiltInAtomWorkers(): void { }); continue; } - if (atom.id === 'visual-validation') { - registerAtomWorker({ - id: atom.id, - describe: 'renders the current artifact and compares it against reference screenshots', - run: visualValidationWorker, - }); - continue; - } registerAtomWorker({ id: atom.id, describe: 'permissive default (daemon has no independent ground truth for this atom)', @@ -81,28 +68,6 @@ function critiqueTheaterWorker(ctx: AtomWorkerContext): AtomOutcome { return { signals: {} }; } -async function visualValidationWorker(ctx: AtomWorkerContext): Promise { - if (!ctx.cwd) { - return { - signals: { - 'preview.ok': false, - 'critique.score': 1, - }, - note: 'visual validation failed: run has no project working directory', - }; - } - const result = await runVisualValidation({ - cwd: ctx.cwd, - projectId: ctx.projectId, - daemonUrl: ctx.daemonUrl, - entryFile: ctx.entryFile, - }); - return { - signals: result.signals, - note: result.report.message, - }; -} - // Matches `score=4`, `score: 4.5`, `Critique score 4/5`, etc. function parseCritiqueScore(summary: string | null): number | null { if (!summary) return null; diff --git a/apps/daemon/src/plugins/atoms/registry.ts b/apps/daemon/src/plugins/atoms/registry.ts index 5953feb1a..ea5acd92a 100644 --- a/apps/daemon/src/plugins/atoms/registry.ts +++ b/apps/daemon/src/plugins/atoms/registry.ts @@ -35,9 +35,6 @@ export interface AtomWorkerContext { runId: string; projectId: string; conversationId: string | null; - daemonUrl: string | null; - cwd: string | null; - entryFile: string | null; stage: PipelineStage; iteration: number; snapshot: AppliedPluginSnapshot; diff --git a/apps/daemon/src/plugins/atoms/visual-validation.ts b/apps/daemon/src/plugins/atoms/visual-validation.ts deleted file mode 100644 index e9ac7d40a..000000000 --- a/apps/daemon/src/plugins/atoms/visual-validation.ts +++ /dev/null @@ -1,672 +0,0 @@ -import path from 'node:path'; -import { promises as fsp } from 'node:fs'; -import pixelmatch from 'pixelmatch'; -import { PNG } from 'pngjs'; -import { type Page, type ViewportSize } from 'playwright'; -import { detectEntryFile } from '../../projects.js'; -import type { UntilSignals } from '../until.js'; - -const DEFAULT_PIXELMATCH_THRESHOLD = 0.1; -const DEFAULT_DIFF_BOX_PADDING = 12; -const DEFAULT_DIFF_BOX_MERGE_DISTANCE = 24; -const DEFAULT_DIFF_BOX_STROKE_WIDTH = 2; -const DEFAULT_MAX_DIFF_BOX_REGIONS = 12; -const DEFAULT_MAX_CANVAS_PIXELS = 16_000_000; -const DIFF_COLOR = [255, 76, 76] as const; -const IGNORED_REFERENCE_SCAN_DIRS = new Set(['critique', 'dist', 'node_modules', '.next']); -const AUTO_DISCOVERED_REFERENCE_IMAGE_RE = /\.png$/i; -const PACKAGED_PLAYWRIGHT_BROWSERS_DIR = 'ms-playwright'; -const VISUAL_VALIDATION_CHROMIUM_CHANNEL = 'chromium'; - -export interface VisualValidationCaptureInput { - entryFile: string; - entryUrl: string; - outputPath: string; - viewport: ViewportSize; -} - -export interface VisualValidationRegion { - minX: number; - minY: number; - maxX: number; - maxY: number; -} - -export interface VisualValidationComparison { - referencePath: string; - actualPath: string; - diffPath: string; - referenceWidth: number; - referenceHeight: number; - actualWidth: number; - actualHeight: number; - comparedWidth: number; - comparedHeight: number; - diffPixels: number; - diffRatio: number; - similarity: number; - regions: VisualValidationRegion[]; - suggestions: string[]; -} - -export interface VisualValidationReport { - status: 'ok' | 'skipped' | 'failed'; - entryFile: string | null; - message: string; - comparedAt: string; - comparison: VisualValidationComparison | null; -} - -export interface RunVisualValidationOptions { - cwd: string; - projectId?: string | null; - daemonUrl?: string | null; - referenceImages?: ReadonlyArray; - entryFile?: string | null; - entryUrl?: string | null; - pixelmatchThreshold?: number; - captureScreenshot?: (input: VisualValidationCaptureInput) => Promise; -} - -export async function runVisualValidation( - input: RunVisualValidationOptions, -): Promise<{ report: VisualValidationReport; signals: UntilSignals }> { - const cwd = path.resolve(input.cwd); - const entryFile = input.entryFile ?? await detectEntryFile(cwd); - let outputDir: string | null = null; - - try { - const referenceImages = await resolveReferenceImages(cwd, input.referenceImages); - if (referenceImages.length === 0) { - return { - report: { - status: 'skipped', - entryFile, - message: 'skipped: no reference screenshot found for visual validation', - comparedAt: new Date().toISOString(), - comparison: null, - }, - signals: {}, - }; - } - - outputDir = path.join(cwd, 'critique', 'visual-validation'); - await fsp.mkdir(outputDir, { recursive: true }); - if (!entryFile) { - const failure = buildFailedVisualValidationResult( - null, - 'visual validation failed: no HTML entry file found for visual validation', - ); - await writeVisualValidationArtifacts(outputDir, failure.report); - return failure; - } - - let best: VisualValidationComparison | null = null; - for (const [index, referencePath] of referenceImages.entries()) { - const reference = PNG.sync.read(await fsp.readFile(referencePath)); - assertPngSize(reference, referencePath); - const viewport = viewportForReference(reference, referencePath); - const stem = buildReferenceArtifactStem(cwd, referencePath, index); - const actualPath = path.join(outputDir, `${stem}.actual.png`); - const diffPath = path.join(outputDir, `${stem}.diff.png`); - const capture = input.captureScreenshot ?? captureWithPlaywright; - const entryUrl = await resolveVisualValidationEntryUrl({ - entryFile, - ...(input.projectId == null ? {} : { projectId: input.projectId }), - ...(input.daemonUrl == null ? {} : { daemonUrl: input.daemonUrl }), - ...(input.entryUrl == null ? {} : { entryUrl: input.entryUrl }), - }); - await capture({ - entryFile, - entryUrl, - outputPath: actualPath, - viewport, - }); - const actual = PNG.sync.read(await fsp.readFile(actualPath)); - assertPngSize(actual, actualPath); - const comparison = await comparePngs({ - cwd, - reference, - referencePath, - actual, - actualPath, - diffPath, - pixelmatchThreshold: input.pixelmatchThreshold ?? DEFAULT_PIXELMATCH_THRESHOLD, - }); - if (!best || comparison.similarity < best.similarity) best = comparison; - } - - if (!best) { - const failure = buildFailedVisualValidationResult( - entryFile, - 'visual validation failed before any comparisons completed', - ); - await writeVisualValidationArtifacts(outputDir, failure.report); - return failure; - } - - const similarity = best.similarity; - const critiqueBand = similarityToCritiqueScore(similarity); - const report: VisualValidationReport = { - status: 'ok', - entryFile, - message: summarizeComparison(best), - comparedAt: new Date().toISOString(), - comparison: best, - }; - await writeVisualValidationArtifacts(outputDir, report); - return { - report, - signals: { - 'preview.ok': true, - 'critique.score': critiqueBand, - }, - }; - } catch (error) { - const failure = buildFailedVisualValidationResult( - entryFile, - `visual validation failed: ${formatVisualValidationError(error)}`, - ); - if (outputDir) { - await writeVisualValidationArtifacts(outputDir, failure.report).catch(() => {}); - } - return failure; - } -} - -export function similarityToCritiqueScore(similarity: number): number { - if (similarity >= 98) return 5; - if (similarity >= 95) return 4; - if (similarity >= 88) return 3; - if (similarity >= 78) return 2; - return 1; -} - -async function comparePngs(input: { - cwd: string; - reference: PNG; - referencePath: string; - actual: PNG; - actualPath: string; - diffPath: string; - pixelmatchThreshold: number; -}): Promise { - const width = Math.max(input.reference.width, input.actual.width); - const height = Math.max(input.reference.height, input.actual.height); - assertPngPixels(width, height, `${input.referencePath} vs ${input.actualPath}`); - const normalizedReference = normalizePng(input.reference, width, height); - const normalizedActual = normalizePng(input.actual, width, height); - const diffMask = new PNG({ width, height }); - const diffPixels = pixelmatch( - normalizedReference.data, - normalizedActual.data, - diffMask.data, - width, - height, - { - threshold: input.pixelmatchThreshold, - alpha: 0.2, - diffColor: [DIFF_COLOR[0], DIFF_COLOR[1], DIFF_COLOR[2]], - }, - ); - const highlighted = clonePng(normalizedActual); - const mergedRegions = mergeDiffBoxes(diffBoxesFromMask(diffMask), DEFAULT_DIFF_BOX_MERGE_DISTANCE); - for (const region of mergedRegions) { - drawBox(highlighted, padBox(region, DEFAULT_DIFF_BOX_PADDING, width, height), DEFAULT_DIFF_BOX_STROKE_WIDTH); - } - await fsp.writeFile(input.diffPath, PNG.sync.write(highlighted)); - const totalPixels = width * height; - const diffRatio = totalPixels > 0 ? diffPixels / totalPixels : 0; - const similarity = Number(((1 - diffRatio) * 100).toFixed(2)); - return { - referencePath: relativeToProject(input.cwd, input.referencePath), - actualPath: relativeToProject(input.cwd, input.actualPath), - diffPath: relativeToProject(input.cwd, input.diffPath), - referenceWidth: input.reference.width, - referenceHeight: input.reference.height, - actualWidth: input.actual.width, - actualHeight: input.actual.height, - comparedWidth: width, - comparedHeight: height, - diffPixels, - diffRatio: Number(diffRatio.toFixed(6)), - similarity, - regions: mergedRegions, - suggestions: buildSuggestions({ - similarity, - regionCount: mergedRegions.length, - comparedWidth: width, - comparedHeight: height, - referenceWidth: input.reference.width, - referenceHeight: input.reference.height, - actualWidth: input.actual.width, - actualHeight: input.actual.height, - }), - }; -} - -async function captureWithPlaywright(input: VisualValidationCaptureInput): Promise { - configurePackagedPlaywrightEnvironment(); - const { chromium } = await import('playwright'); - const browser = await chromium.launch(resolveVisualValidationChromiumLaunchOptions()); - try { - const page = await browser.newPage({ viewport: input.viewport, deviceScaleFactor: 1 }); - await stabilizePage(page); - await page.goto(input.entryUrl, { waitUntil: 'networkidle' }); - await page.screenshot({ - path: input.outputPath, - fullPage: false, - animations: 'disabled', - caret: 'hide', - }); - } finally { - await browser.close(); - } -} - -export function resolveVisualValidationChromiumLaunchOptions(): { channel: 'chromium' } { - return { - // Playwright's default headless launch uses chromium_headless_shell-*. - // Packaged builds may legitimately ship only chromium-* via `--no-shell`, - // so force the bundled Chromium new-headless channel instead. - channel: VISUAL_VALIDATION_CHROMIUM_CHANNEL, - }; -} - -export function resolvePackagedPlaywrightBrowsersPath(env: NodeJS.ProcessEnv = process.env): string | null { - const configured = env.PLAYWRIGHT_BROWSERS_PATH?.trim(); - if (configured) return configured; - const resourceRoot = env.OD_RESOURCE_ROOT?.trim(); - if (!resourceRoot) return null; - return path.join(resourceRoot, PACKAGED_PLAYWRIGHT_BROWSERS_DIR); -} - -function configurePackagedPlaywrightEnvironment(env: NodeJS.ProcessEnv = process.env): void { - const browsersPath = resolvePackagedPlaywrightBrowsersPath(env); - if (!browsersPath || env.PLAYWRIGHT_BROWSERS_PATH?.trim()) return; - env.PLAYWRIGHT_BROWSERS_PATH = browsersPath; -} - -async function stabilizePage(page: Page): Promise { - await page.addInitScript(` - (() => { - const style = document.createElement('style'); - style.textContent = \` - *, - *::before, - *::after { - animation-duration: 0s !important; - animation-delay: 0s !important; - transition-duration: 0s !important; - transition-delay: 0s !important; - caret-color: transparent !important; - } - html { - scroll-behavior: auto !important; - } - \`; - document.documentElement.appendChild(style); - })(); - `); -} - -async function writeVisualValidationArtifacts( - outputDir: string, - report: VisualValidationReport, -): Promise { - await fsp.writeFile(path.join(outputDir, 'report.json'), JSON.stringify(report, null, 2) + '\n', 'utf8'); - const lines = [ - '# Visual validation', - '', - `Status: ${report.status}`, - `Compared at: ${report.comparedAt}`, - report.entryFile ? `Entry file: ${report.entryFile}` : 'Entry file: ', - '', - report.message, - ]; - if (report.comparison) { - lines.push( - '', - `Reference: ${report.comparison.referencePath}`, - `Actual: ${report.comparison.actualPath}`, - `Diff: ${report.comparison.diffPath}`, - `Similarity: ${report.comparison.similarity}%`, - `Diff ratio: ${(report.comparison.diffRatio * 100).toFixed(2)}%`, - ); - if (report.comparison.suggestions.length > 0) { - lines.push('', 'Suggestions:'); - for (const suggestion of report.comparison.suggestions) { - lines.push(`- ${suggestion}`); - } - } - } - await fsp.writeFile(path.join(outputDir, 'summary.md'), lines.join('\n') + '\n', 'utf8'); -} - -function buildFailedVisualValidationResult( - entryFile: string | null, - message: string, -): { report: VisualValidationReport; signals: UntilSignals } { - return { - report: { - status: 'failed', - entryFile, - message, - comparedAt: new Date().toISOString(), - comparison: null, - }, - signals: { 'preview.ok': false, 'critique.score': 1 }, - }; -} - -async function resolveVisualValidationEntryUrl(input: { - entryFile: string; - projectId?: string | null; - daemonUrl?: string | null; - entryUrl?: string | null; -}): Promise { - if (typeof input.entryUrl === 'string' && input.entryUrl.length > 0) { - return input.entryUrl; - } - if (!input.projectId || !input.daemonUrl) { - throw new Error( - 'visual validation requires daemon preview context to resolve the project entry URL', - ); - } - const base = input.daemonUrl.replace(/\/+$/, ''); - const response = await fetch( - `${base}/api/projects/${encodeURIComponent(input.projectId)}/preview-url?file=${encodeURIComponent(input.entryFile)}`, - { headers: { accept: 'application/json' } }, - ); - if (!response.ok) { - throw new Error(`visual validation preview route lookup failed: ${response.status} ${response.statusText}`); - } - const payload = await response.json() as { url?: unknown }; - if (typeof payload.url !== 'string' || payload.url.length === 0) { - throw new Error('visual validation preview route lookup returned no url'); - } - return new URL(payload.url, `${base}/`).toString(); -} - -async function resolveReferenceImages( - cwd: string, - explicit?: ReadonlyArray, -): Promise { - if (explicit && explicit.length > 0) { - return explicit.map((entry) => path.resolve(cwd, entry)); - } - - const files = await walkFiles(cwd, ''); - const candidates = files.filter((relPath) => { - const lower = relPath.toLowerCase(); - if (!AUTO_DISCOVERED_REFERENCE_IMAGE_RE.test(lower)) return false; - if (lower.startsWith('critique/')) return false; - return isAutoDiscoveredReferenceImage(lower); - }); - candidates.sort(); - return candidates.map((relPath) => path.join(cwd, relPath)); -} - -function isAutoDiscoveredReferenceImage(relPath: string): boolean { - const name = path.basename(relPath); - const dirSegments = path.dirname(relPath) - .split(/[\\/]+/) - .filter((segment) => segment !== '.' && segment.length > 0); - return isNamedReferenceImage(name) - || dirSegments.includes('references') - || dirSegments.includes('reference') - || dirSegments.includes('spec'); -} - -function isNamedReferenceImage(name: string): boolean { - return name.startsWith('reference') - || name.startsWith('baseline') - || name.startsWith('expected'); -} - -async function walkFiles(root: string, relDir: string): Promise { - const dir = relDir ? path.join(root, relDir) : root; - const entries = await fsp.readdir(dir, { withFileTypes: true }); - const out: string[] = []; - for (const entry of entries) { - if (entry.name.startsWith('.')) continue; - if (entry.isSymbolicLink()) continue; - const relPath = relDir ? path.join(relDir, entry.name) : entry.name; - if (entry.isDirectory()) { - if (IGNORED_REFERENCE_SCAN_DIRS.has(entry.name)) continue; - out.push(...await walkFiles(root, relPath)); - continue; - } - if (entry.isFile()) out.push(relPath); - } - return out; -} - -function summarizeComparison(comparison: VisualValidationComparison): string { - const parts = [ - `visual similarity ${comparison.similarity}% against ${path.basename(comparison.referencePath)}`, - `${comparison.regions.length} highlighted diff region${comparison.regions.length === 1 ? '' : 's'}`, - ]; - if (comparison.suggestions.length > 0) { - parts.push(`focus: ${comparison.suggestions[0]}`); - } - return parts.join('; '); -} - -function formatVisualValidationError(error: unknown): string { - if (error instanceof Error && error.message) return error.message; - return 'unknown error'; -} - -function buildSuggestions(input: { - similarity: number; - regionCount: number; - comparedWidth: number; - comparedHeight: number; - referenceWidth: number; - referenceHeight: number; - actualWidth: number; - actualHeight: number; -}): string[] { - const suggestions: string[] = []; - if (Math.abs(input.referenceWidth - input.actualWidth) > 24 || Math.abs(input.referenceHeight - input.actualHeight) > 24) { - suggestions.push('Match the reference canvas size or responsive breakpoint before tuning local styling.'); - } - if (input.regionCount === 0 && input.similarity < 100) { - suggestions.push('Recheck anti-aliasing, image loading, and screenshot viewport settings.'); - } else if (input.regionCount <= 2 && input.similarity < 95) { - suggestions.push('Fix the most visible component-level styling mismatches in the highlighted regions.'); - } else if (input.regionCount >= 6) { - suggestions.push('Layout, spacing, or typography is drifting across the page rather than in one isolated component.'); - } - if (input.similarity < 90) { - suggestions.push('Audit large spacing, sizing, and color-token differences before doing fine polish.'); - } - if (suggestions.length === 0) { - suggestions.push('Only minor visual polish remains; tighten spacing and token parity in the highlighted regions.'); - } - return suggestions; -} - -function clonePng(source: PNG): PNG { - const target = new PNG({ width: source.width, height: source.height }); - source.data.copy(target.data); - return target; -} - -function viewportForReference(reference: PNG, label: string): ViewportSize { - assertPngSize(reference, label); - return { width: reference.width, height: reference.height }; -} - -function normalizePng(source: PNG, width: number, height: number): PNG { - const out = new PNG({ width, height }); - PNG.bitblt(source, out, 0, 0, source.width, source.height, 0, 0); - return out; -} - -function assertPngSize(png: PNG, label: string): void { - assertPngPixels(png.width, png.height, label); -} - -function assertPngPixels(width: number, height: number, label: string): void { - if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { - throw new Error(`${label} has invalid PNG dimensions`); - } - const pixels = width * height; - if (pixels > DEFAULT_MAX_CANVAS_PIXELS) { - throw new Error(`${label} is ${pixels} pixels; maximum allowed is ${DEFAULT_MAX_CANVAS_PIXELS} pixels`); - } -} - -function drawBox(png: PNG, box: VisualValidationRegion, strokeWidth: number): void { - for (let y = box.minY; y <= box.maxY; y += 1) { - for (let x = box.minX; x <= box.maxX; x += 1) { - const isStroke = - x - box.minX < strokeWidth - || box.maxX - x < strokeWidth - || y - box.minY < strokeWidth - || box.maxY - y < strokeWidth; - if (!isStroke) continue; - const index = (y * png.width + x) << 2; - png.data[index] = DIFF_COLOR[0]; - png.data[index + 1] = DIFF_COLOR[1]; - png.data[index + 2] = DIFF_COLOR[2]; - png.data[index + 3] = 255; - } - } -} - -function diffBoxesFromMask(maskPng: PNG): VisualValidationRegion[] { - const { width, height } = maskPng; - const changed = new Uint8Array(width * height); - let overall: VisualValidationRegion | null = null; - for (let index = 0; index < changed.length; index += 1) { - const dataIndex = index << 2; - if ( - maskPng.data[dataIndex] === DIFF_COLOR[0] - && maskPng.data[dataIndex + 1] === DIFF_COLOR[1] - && maskPng.data[dataIndex + 2] === DIFF_COLOR[2] - ) { - changed[index] = 1; - const x = index % width; - const y = Math.floor(index / width); - overall = overall == null - ? { minX: x, minY: y, maxX: x, maxY: y } - : { - minX: Math.min(overall.minX, x), - minY: Math.min(overall.minY, y), - maxX: Math.max(overall.maxX, x), - maxY: Math.max(overall.maxY, y), - }; - } - } - if (overall == null) return []; - const boxes: VisualValidationRegion[] = []; - const queue = new Int32Array(width * height); - for (let index = 0; index < changed.length; index += 1) { - if (changed[index] === 0) continue; - let head = 0; - let tail = 0; - let minX = index % width; - let maxX = minX; - let minY = Math.floor(index / width); - let maxY = minY; - changed[index] = 0; - queue[tail++] = index; - while (head < tail) { - const current = queue[head++] ?? -1; - if (current < 0) continue; - const x = current % width; - const y = Math.floor(current / width); - minX = Math.min(minX, x); - maxX = Math.max(maxX, x); - minY = Math.min(minY, y); - maxY = Math.max(maxY, y); - tail = enqueueChanged(changed, queue, tail, x > 0 ? current - 1 : -1); - tail = enqueueChanged(changed, queue, tail, x < width - 1 ? current + 1 : -1); - tail = enqueueChanged(changed, queue, tail, y > 0 ? current - width : -1); - tail = enqueueChanged(changed, queue, tail, y < height - 1 ? current + width : -1); - } - boxes.push({ minX, minY, maxX, maxY }); - if (boxes.length > DEFAULT_MAX_DIFF_BOX_REGIONS) return [overall]; - } - return boxes; -} - -function enqueueChanged(changed: Uint8Array, queue: Int32Array, tail: number, index: number): number { - if (index < 0 || changed[index] === 0) return tail; - changed[index] = 0; - queue[tail] = index; - return tail + 1; -} - -function mergeDiffBoxes(boxes: VisualValidationRegion[], distance: number): VisualValidationRegion[] { - if (boxes.length < 2) return boxes; - const pending = boxes.slice(); - const merged: VisualValidationRegion[] = []; - while (pending.length > 0) { - let current = pending.shift()!; - let changed = true; - while (changed) { - changed = false; - for (let i = pending.length - 1; i >= 0; i -= 1) { - const candidate = pending[i]!; - if (!boxesTouchOrNear(current, candidate, distance)) continue; - current = { - minX: Math.min(current.minX, candidate.minX), - minY: Math.min(current.minY, candidate.minY), - maxX: Math.max(current.maxX, candidate.maxX), - maxY: Math.max(current.maxY, candidate.maxY), - }; - pending.splice(i, 1); - changed = true; - } - } - merged.push(current); - } - return merged; -} - -function boxesTouchOrNear(a: VisualValidationRegion, b: VisualValidationRegion, distance: number): boolean { - return !( - a.maxX + distance < b.minX - || b.maxX + distance < a.minX - || a.maxY + distance < b.minY - || b.maxY + distance < a.minY - ); -} - -function padBox( - box: VisualValidationRegion, - padding: number, - maxWidth: number, - maxHeight: number, -): VisualValidationRegion { - return { - minX: clamp(box.minX - padding, 0, maxWidth - 1), - minY: clamp(box.minY - padding, 0, maxHeight - 1), - maxX: clamp(box.maxX + padding, 0, maxWidth - 1), - maxY: clamp(box.maxY + padding, 0, maxHeight - 1), - }; -} - -function sanitizeStem(input: string): string { - const normalized = input.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, ''); - return normalized || 'reference'; -} - -function buildReferenceArtifactStem(cwd: string, referencePath: string, index: number): string { - const relativeReferencePath = relativeToProject(cwd, referencePath) - .replace(/^\.\//, '') - .replace(/\.[^.]+$/, ''); - return `${sanitizeStem(relativeReferencePath)}-${index + 1}`; -} - -function relativeToProject(cwd: string, target: string): string { - return path.relative(cwd, target).split(path.sep).join('/'); -} - -function clamp(value: number, min: number, max: number): number { - return Math.max(min, Math.min(max, value)); -} diff --git a/apps/daemon/src/plugins/index.ts b/apps/daemon/src/plugins/index.ts index 02d4d2879..0bc5865d2 100644 --- a/apps/daemon/src/plugins/index.ts +++ b/apps/daemon/src/plugins/index.ts @@ -83,7 +83,6 @@ export * from './atoms/patch-edit.js'; export * from './atoms/registry.js'; export * from './atoms/rewrite-plan.js'; export * from './atoms/token-map.js'; -export * from './atoms/visual-validation.js'; export * from './bundled.js'; export * from './connector-gate.js'; export * from './connector-probe.js'; @@ -94,7 +93,6 @@ export * from './lockfile.js'; export * from './persistence.js'; export * from './marketplaces.js'; export * from './pipeline.js'; -export * from './pipeline-schedule.js'; export * from './pipeline-runner.js'; export * from './publish.js'; export * from './registry.js'; diff --git a/apps/daemon/src/plugins/pipeline-schedule.ts b/apps/daemon/src/plugins/pipeline-schedule.ts deleted file mode 100644 index 6ec441936..000000000 --- a/apps/daemon/src/plugins/pipeline-schedule.ts +++ /dev/null @@ -1,106 +0,0 @@ -import type { - AppliedPluginSnapshot, - GenUISurfaceSpec, - PipelineStage, - PluginPipeline, -} from '@open-design/contracts'; - -const POST_RUN_ATOMS = new Set([ - 'visual-validation', -]); - -export interface PipelineScheduleSplit { - preRun: PluginPipeline | null; - postRun: PluginPipeline | null; -} - -export interface PipelineSnapshotScheduleSplit { - preRun: AppliedPluginSnapshot | null; - postRun: AppliedPluginSnapshot | null; -} - -export function splitPipelineByExecutionBoundary( - pipeline: PluginPipeline | null | undefined, -): PipelineScheduleSplit { - if (!pipeline?.stages?.length) { - return { preRun: null, postRun: null }; - } - - const postRunStart = pipeline.stages.findIndex((stage) => - stage.atoms.some((atomId) => POST_RUN_ATOMS.has(atomId))); - if (postRunStart < 0) { - return { preRun: pipeline, postRun: null }; - } - - const preRunStages: PipelineStage[] = pipeline.stages.slice(0, postRunStart); - const postRunStages: PipelineStage[] = pipeline.stages.slice(postRunStart); - - return { - preRun: preRunStages.length > 0 ? { ...pipeline, stages: preRunStages } : null, - postRun: postRunStages.length > 0 ? { ...pipeline, stages: postRunStages } : null, - }; -} - -export function splitPipelineSnapshotByExecutionBoundary( - snapshot: AppliedPluginSnapshot | null | undefined, -): PipelineSnapshotScheduleSplit { - if (!snapshot) { - return { preRun: null, postRun: null }; - } - - const pipelineSplit = splitPipelineByExecutionBoundary(snapshot.pipeline); - const preRunSurfaceOnlySnapshot = buildPreRunSurfaceOnlySnapshot(snapshot, pipelineSplit.preRun); - return { - preRun: pipelineSplit.preRun - ? { - ...snapshot, - pipeline: pipelineSplit.preRun, - genuiSurfaces: filterSurfacesForPipelineStages( - snapshot.genuiSurfaces, - pipelineSplit.preRun, - { includeTriggerless: true }, - ), - } - : preRunSurfaceOnlySnapshot, - postRun: pipelineSplit.postRun - ? { - ...snapshot, - pipeline: pipelineSplit.postRun, - genuiSurfaces: filterSurfacesForPipelineStages( - snapshot.genuiSurfaces, - pipelineSplit.postRun, - { includeTriggerless: false }, - ), - } - : null, - }; -} - -function buildPreRunSurfaceOnlySnapshot( - snapshot: AppliedPluginSnapshot, - preRunPipeline: PluginPipeline | null, -): AppliedPluginSnapshot | null { - if (preRunPipeline) return null; - const triggerlessSurfaces = snapshot.genuiSurfaces?.filter((surface) => !surface.trigger?.stageId); - if (!triggerlessSurfaces?.length) return null; - return { - ...snapshot, - pipeline: { stages: [] }, - genuiSurfaces: triggerlessSurfaces, - }; -} - -function filterSurfacesForPipelineStages( - surfaces: GenUISurfaceSpec[] | undefined, - pipeline: PluginPipeline, - options: { includeTriggerless: boolean }, -): GenUISurfaceSpec[] | undefined { - if (!surfaces?.length) return surfaces; - - const stageIds = new Set(pipeline.stages.map((stage) => stage.id)); - return surfaces.filter((surface) => { - const stageId = surface.trigger?.stageId; - if (!stageId) return options.includeTriggerless; - return stageIds.has(stageId); - }); -} diff --git a/apps/daemon/src/server.ts b/apps/daemon/src/server.ts index c6dc2291b..7ccdea60e 100644 --- a/apps/daemon/src/server.ts +++ b/apps/daemon/src/server.ts @@ -157,7 +157,6 @@ import { restoreProjectSnapshotLink, resolvePluginSnapshot, runPipelineForRun, - splitPipelineSnapshotByExecutionBoundary, runStageWithRegistry, startSnapshotGc, uninstallPlugin, @@ -10935,13 +10934,10 @@ export async function startServer({ // back to the canned v1 stub for diagnostic bisection or replay // of pre-Stage-D runs. Errors are swallowed (logged) so a bad // pipeline never blocks the agent run. - const executePipelineForRun = async (args) => { + const firePipelineForRun = (args) => { const { run, snapshot, runs, db: dbHandle } = args; - if (!snapshot?.pipeline) { - return { outcomes: [], lastSignalsByStage: new Map() }; - } + if (!snapshot?.pipeline?.stages?.length) return; const env = { maxIterations: readPluginEnvKnobs().maxDevloopIterations }; - const lastSignalsByStage = new Map(); const emitPipeline = (evt) => { try { runs.emit(run, evt.kind, evt); } catch {/* ignore */} }; @@ -10956,47 +10952,32 @@ export async function startServer({ : 'registry'; let runStage; if (runnerMode === 'stub') { - runStage = ({ stage, iteration }) => { - const outcome = { - signals: { - 'critique.score': iteration >= 0 ? 4 : 0, - 'preview.ok': true, - 'user.confirmed': true, - }, - }; - lastSignalsByStage.set(stage.id, outcome.signals); - return outcome; - }; + runStage = ({ iteration }) => ({ + signals: { + 'critique.score': iteration >= 0 ? 4 : 0, + 'preview.ok': true, + 'user.confirmed': true, + }, + }); } else { registerBuiltInAtomWorkers(); runStage = async ({ stage, iteration, snapshot: stageSnapshot }) => { - const projectRecord = getProject(dbHandle, projectIdForRun); - const cwd = projectRecord - ? resolveProjectDir(PROJECTS_DIR, projectIdForRun, projectRecord.metadata) - : null; - const entryFile = typeof projectRecord?.metadata?.entryFile === 'string' - ? projectRecord.metadata.entryFile - : null; const outcome = await runStageWithRegistry({ db: dbHandle, runId: run.id, projectId: projectIdForRun, conversationId: run.conversationId ?? null, - daemonUrl, - cwd, - entryFile, stage, iteration, snapshot: stageSnapshot, }); - lastSignalsByStage.set(stage.id, outcome.signals ?? {}); return { signals: outcome.signals, critiqueSummary: outcome.critiqueSummary, }; }; } - const outcomes = await runPipelineForRun({ + void runPipelineForRun({ db: dbHandle, runId: run.id, projectId: projectIdForRun, @@ -11007,13 +10988,7 @@ export async function startServer({ runStage, emitPipeline, emitGenui, - }); - return { outcomes, lastSignalsByStage }; - }; - - const firePipelineForRun = (args) => { - const { run, snapshot, runs } = args; - void executePipelineForRun(args).catch((err) => { + }).catch((err) => { try { runs.emit(run, 'pipeline_stage_failed', { runId: run.id, @@ -13542,67 +13517,25 @@ export async function startServer({ for (const chunk of plaintextStdoutBuffer) { send('stdout', { chunk }); } - let finalStatus = status; - if ( - finalStatus === 'succeeded' - && run.postRunPipelineSnapshot?.pipeline?.stages?.length - ) { - try { - const { outcomes, lastSignalsByStage } = await executePipelineForRun({ - run, - snapshot: run.postRunPipelineSnapshot, - runs: design.runs, - db, + // Capture the pi session file path for conversational continuity. + // The session path is discovered by attachPiRpcSession when it + // processes agent_end; persist it under (conversationId, agentId) so + // another conversation in the same cwd cannot inherit this history. + if (acpSession && typeof acpSession.getLastSessionPath === 'function') { + const sessionPath = acpSession.getLastSessionPath(); + if (status === 'succeeded' && def.streamFormat === 'pi-rpc') { + persistCapturedAgentSession(db, { + conversationId: run.conversationId, + agentId: def.id, + sessionId: sessionPath, + stablePromptHash: currentStableHash, }); - const failedStage = outcomes.find((outcome) => { - if (!outcome.converged) return true; - const stage = run.postRunPipelineSnapshot.pipeline.stages.find( - (candidate) => candidate.id === outcome.stageId, - ); - if (!stage?.atoms.includes('visual-validation')) return false; - const signals = lastSignalsByStage.get(outcome.stageId) ?? {}; - if (signals['preview.ok'] === false) return true; - return typeof signals['critique.score'] === 'number' - && signals['critique.score'] < 4; - }); - if (failedStage) { - const failedSignals = lastSignalsByStage.get(failedStage.stageId) ?? {}; - const failedScore = failedSignals['critique.score']; - send('error', createSseErrorPayload( - 'PLUGIN_PIPELINE_FAILED', - typeof failedScore === 'number' - ? `Post-run visual validation scored ${failedScore}, so the run cannot finish successfully.` - : `Post-run pipeline stage "${failedStage.stageId}" did not finish successfully.`, - )); - finalStatus = 'failed'; - } - } catch (err) { - send('error', createSseErrorPayload( - 'PLUGIN_PIPELINE_FAILED', - err instanceof Error ? err.message : String(err), - )); - finalStatus = 'failed'; } } - if (finalStatus === 'succeeded') { - // Capture the pi session file path for conversational continuity. - // The session path is discovered by attachPiRpcSession when it - // processes agent_end; persist it under (conversationId, agentId) so - // another conversation in the same cwd cannot inherit this history. - if (acpSession && typeof acpSession.getLastSessionPath === 'function') { - const sessionPath = acpSession.getLastSessionPath(); - if (def.streamFormat === 'pi-rpc') { - persistCapturedAgentSession(db, { - conversationId: run.conversationId, - agentId: def.id, - sessionId: sessionPath, - stablePromptHash: currentStableHash, - }); - } - } + if (status === 'succeeded') { persistDeliveredAgentSessionState(); } - finishWithRetryDecision(finalStatus, code, signal); + finishWithRetryDecision(status, code, signal); } finally { // Best-effort cleanup of the per-run agy log file on every close // path — successful, failed, cancelled, or non-zero exit — so @@ -14073,17 +14006,19 @@ export async function startServer({ : {}), }; res.status(202).json(body); - const pipelineSchedule = resolvedSnapshot?.ok - ? splitPipelineSnapshotByExecutionBoundary(resolvedSnapshot.snapshot) - : { preRun: null, postRun: null }; - // Fire only pre-run-safe stages before the agent starts. Stages that - // depend on agent-produced artifacts (`visual-validation`) are - // deferred until the run succeeds so they inspect the current output - // instead of the untouched pre-run workspace. - if (resolvedSnapshot?.ok && pipelineSchedule.preRun) { + // Plan §3.I1 / spec §10.1 — fire the pipeline schedule on the run's + // SSE stream BEFORE the agent process is started. The first + // pipeline_stage_started event is emitted synchronously (before + // the first await inside runPipelineForRun), so any SSE consumer + // that subscribes between create() and start() sees a stage event + // ahead of the agent's message_chunk stream — exactly what §8 e2e-3 + // expects. The stub stage runner returns immediately so a + // non-loop pipeline walks through every stage in O(stages) time; + // the audit row in `run_devloop_iterations` records the timeline. + if (resolvedSnapshot?.ok && resolvedSnapshot.snapshot.pipeline) { firePipelineForRun({ run, - snapshot: pipelineSchedule.preRun, + snapshot: resolvedSnapshot.snapshot, runs: design.runs, db, }); @@ -14098,7 +14033,6 @@ export async function startServer({ console.warn('[plugins] skill candidate hook setup failed', err); } } - run.postRunPipelineSnapshot = pipelineSchedule.postRun; design.runs.start(run, () => startChatRun(meta, run)); // Analytics v2: emit run_created (daemon-side authoritative) and diff --git a/apps/daemon/tests/plugins-atom-registry.test.ts b/apps/daemon/tests/plugins-atom-registry.test.ts index ca288693b..b8c5a7ca0 100644 --- a/apps/daemon/tests/plugins-atom-registry.test.ts +++ b/apps/daemon/tests/plugins-atom-registry.test.ts @@ -92,9 +92,6 @@ function ctxFor(stage: PipelineStage, iteration = 0): AtomWorkerContext { runId: 'run-1', projectId: 'project-1', conversationId: 'conv-A', - daemonUrl: null, - cwd: tmpDir, - entryFile: null, stage, iteration, snapshot: fakeSnapshot(), @@ -230,7 +227,7 @@ describe('built-in critique-theater worker', () => { }); describe('registerBuiltInAtomWorkers: idempotency', () => { - it('registers every implemented first-party atom exactly once even on repeat calls', () => { + it('registers every FIRST_PARTY_ATOM exactly once even on repeat calls', () => { registerBuiltInAtomWorkers(); const first = listRegisteredAtomIds(); registerBuiltInAtomWorkers(); @@ -239,7 +236,6 @@ describe('registerBuiltInAtomWorkers: idempotency', () => { expect(first).toContain('critique-theater'); expect(first).toContain('file-write'); expect(first).toContain('media-image'); - expect(first).toContain('visual-validation'); }); }); diff --git a/apps/daemon/tests/plugins-atoms-info.test.ts b/apps/daemon/tests/plugins-atoms-info.test.ts index f15c3c176..84d99c7f9 100644 --- a/apps/daemon/tests/plugins-atoms-info.test.ts +++ b/apps/daemon/tests/plugins-atoms-info.test.ts @@ -29,15 +29,9 @@ describe('atoms catalog — Phase 6/7/8 promotion', () => { expect(atom?.taskKinds).toContain('code-migration'); }); - it("'visual-validation' is implemented with daemon-backed screenshot comparison", () => { - const atom = findAtom('visual-validation'); - expect(atom?.status).toBe('implemented'); - expect(isImplementedAtom('visual-validation')).toBe(true); - }); - - it('the catalog no longer contains planned first-party atoms', () => { + it('the catalog has no remaining planned atoms (after the §3.AA2 promotion)', () => { const planned = FIRST_PARTY_ATOMS.filter((a) => a.status === 'planned'); - expect(planned).toEqual([]); + expect(planned.map((a) => a.id)).toEqual([]); }); it('every atom in the catalog has a non-empty taskKinds[]', () => { diff --git a/apps/daemon/tests/plugins-bundled-atoms-roster.test.ts b/apps/daemon/tests/plugins-bundled-atoms-roster.test.ts index 6543daf7a..bde9f5387 100644 --- a/apps/daemon/tests/plugins-bundled-atoms-roster.test.ts +++ b/apps/daemon/tests/plugins-bundled-atoms-roster.test.ts @@ -25,7 +25,6 @@ const PHASE_4_ATOMS = [ 'direction-picker', 'todo-write', 'critique-theater', - 'visual-validation', ]; // Phase 6 (figma-migration native, spec §21.4) diff --git a/apps/daemon/tests/plugins-bundled-scenarios-roster.test.ts b/apps/daemon/tests/plugins-bundled-scenarios-roster.test.ts index 14407d5c9..3706e92e6 100644 --- a/apps/daemon/tests/plugins-bundled-scenarios-roster.test.ts +++ b/apps/daemon/tests/plugins-bundled-scenarios-roster.test.ts @@ -62,10 +62,6 @@ describe('plugins/_official/scenarios roster', () => { expect(manifest.od.taskKind).toBe(expected.taskKind); const stageIds = manifest.od.pipeline.stages.map((s: { id: string }) => s.id); expect(stageIds).toEqual(expected.pipelineStages); - if (folder === 'od-new-generation' || folder === 'od-tune-collab') { - const critiqueStage = manifest.od.pipeline.stages.find((s: { id: string }) => s.id === 'critique'); - expect(critiqueStage?.atoms).toEqual(['critique-theater']); - } }); } diff --git a/apps/daemon/tests/plugins-headless-run.test.ts b/apps/daemon/tests/plugins-headless-run.test.ts index c441794cb..a9b124657 100644 --- a/apps/daemon/tests/plugins-headless-run.test.ts +++ b/apps/daemon/tests/plugins-headless-run.test.ts @@ -31,14 +31,6 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import path from 'node:path'; import url from 'node:url'; import { promisify } from 'node:util'; -import { - clearAtomWorkers, - registerAtomWorker, -} from '../src/plugins/atoms/registry.js'; -import { - registerBuiltInAtomWorkers, - resetBuiltInAtomWorkersForTests, -} from '../src/plugins/atoms/built-ins.js'; import { startServer } from '../src/server.js'; const __dirname = path.dirname(url.fileURLToPath(import.meta.url)); @@ -728,324 +720,4 @@ process.stdin.on('end', () => { await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}/cancel`, { method: 'POST' }); await fs.rm(tmpRoot, { recursive: true, force: true }); }); - - it('defers visual-validation until after the agent rewrites the artifact', async () => { - const fs = await import('node:fs/promises'); - const os = await import('node:os'); - const tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'od-headless-visual-validation-')); - const fixture = path.join(tmpRoot, 'visual-validation-plugin'); - const seenHtml: string[] = []; - - clearAtomWorkers(); - resetBuiltInAtomWorkersForTests(); - registerBuiltInAtomWorkers(); - registerAtomWorker({ - id: 'visual-validation', - run: async (ctx) => { - if (!ctx.cwd) throw new Error('expected project cwd for visual validation'); - seenHtml.push(await readFile(path.join(ctx.cwd, 'index.html'), 'utf8')); - return { - signals: { - 'preview.ok': true, - 'critique.score': 5, - }, - note: 'captured test artifact', - }; - }, - }); - - try { - await fs.mkdir(fixture, { recursive: true }); - await fs.writeFile( - path.join(fixture, 'open-design.json'), - JSON.stringify({ - $schema: 'https://open-design.ai/schemas/plugin.v1.json', - name: 'visual-validation-plugin', - title: 'Visual Validation Plugin', - version: '1.0.0', - description: 'fixture with a post-run visual validation stage', - license: 'MIT', - od: { - kind: 'skill', - taskKind: 'new-generation', - useCase: { query: 'Make a {{topic}} brief.' }, - inputs: [{ name: 'topic', type: 'string', required: true, label: 'Topic' }], - pipeline: { - stages: [ - { - id: 'critique', - atoms: ['visual-validation'], - repeat: false, - }, - ], - }, - capabilities: ['prompt:inject'], - }, - }, null, 2), - ); - await fs.writeFile( - path.join(fixture, 'SKILL.md'), - '---\nname: visual-validation-plugin\ndescription: fixture with visual validation\n---\n# Visual validation\n', - ); - - const installResp = await fetch(`${baseUrl}/api/plugins/install`, { - method: 'POST', - headers: { 'content-type': 'application/json', accept: 'text/event-stream' }, - body: JSON.stringify({ source: fixture }), - }); - await readSseUntilSuccess(installResp); - - const projectId = `visual-validation-${Date.now()}`; - const createResp = await fetch(`${baseUrl}/api/projects`, { - method: 'POST', - headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ - id: projectId, - name: 'Visual validation pipeline e2e', - pluginId: 'visual-validation-plugin', - pluginInputs: { topic: 'artifact rewrite' }, - grantCaps: ['pipeline:*'], - }), - }); - expect(createResp.status).toBe(200); - const createBody = (await createResp.json()) as { - appliedPluginSnapshotId?: string; - }; - expect(createBody.appliedPluginSnapshotId).toBeTruthy(); - - const seedResp = await fetch(`${baseUrl}/api/projects/${projectId}/files`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: 'index.html', content: '

before

' }), - }); - expect(seedResp.status).toBe(200); - - await withFakeAgent( - 'opencode', - ` -const fs = require('node:fs'); -if (process.argv.includes('--version')) { - console.log('opencode 0.0.0'); - process.exit(0); -} -if (process.argv[2] === 'models') { - console.log('test/model'); - process.exit(0); -} -if (process.argv[2] === 'run') { - setTimeout(() => { - fs.writeFileSync('index.html', '

after

'); - console.log(JSON.stringify({ type: 'text', part: { text: 'rewritten' } })); - process.exit(0); - }, 150); -} -`, - async () => { - const runResp = await fetch(`${baseUrl}/api/runs`, { - method: 'POST', - headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ - agentId: 'opencode', - projectId, - pluginId: 'visual-validation-plugin', - appliedPluginSnapshotId: createBody.appliedPluginSnapshotId, - grantCaps: ['pipeline:*'], - }), - }); - expect(runResp.status).toBe(202); - const runBody = (await runResp.json()) as { runId: string }; - - await new Promise((resolve) => setTimeout(resolve, 50)); - expect(seenHtml).toEqual([]); - - const deadline = Date.now() + 5_000; - while (Date.now() < deadline) { - const statusResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}`); - expect(statusResp.status).toBe(200); - const statusBody = (await statusResp.json()) as { status?: string }; - if (statusBody.status === 'succeeded' && seenHtml.length > 0) break; - await new Promise((resolve) => setTimeout(resolve, 50)); - } - - expect(seenHtml).toEqual(['

after

']); - }, - ); - } finally { - clearAtomWorkers(); - resetBuiltInAtomWorkersForTests(); - registerBuiltInAtomWorkers(); - await fs.rm(tmpRoot, { recursive: true, force: true }); - } - }); - - it('fails the run after post-run visual validation and before the terminal end event', async () => { - const fs = await import('node:fs/promises'); - const os = await import('node:os'); - const tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'od-headless-visual-validation-gate-')); - const fixture = path.join(tmpRoot, 'visual-validation-gate-plugin'); - const seenHtml: string[] = []; - - clearAtomWorkers(); - resetBuiltInAtomWorkersForTests(); - registerBuiltInAtomWorkers(); - registerAtomWorker({ - id: 'visual-validation', - run: async (ctx) => { - if (!ctx.cwd) throw new Error('expected project cwd for visual validation'); - seenHtml.push(await readFile(path.join(ctx.cwd, 'index.html'), 'utf8')); - return { - signals: { - 'preview.ok': false, - 'critique.score': 1, - }, - note: 'captured failing test artifact', - }; - }, - }); - - try { - await fs.mkdir(fixture, { recursive: true }); - await fs.writeFile( - path.join(fixture, 'open-design.json'), - JSON.stringify({ - $schema: 'https://open-design.ai/schemas/plugin.v1.json', - name: 'visual-validation-gate-plugin', - title: 'Visual Validation Gate Plugin', - version: '1.0.0', - description: 'fixture with a failing post-run visual validation stage', - license: 'MIT', - od: { - kind: 'skill', - taskKind: 'new-generation', - useCase: { query: 'Make a {{topic}} brief.' }, - inputs: [{ name: 'topic', type: 'string', required: true, label: 'Topic' }], - pipeline: { - stages: [ - { - id: 'critique', - atoms: ['visual-validation'], - repeat: false, - }, - ], - }, - capabilities: ['prompt:inject'], - }, - }, null, 2), - ); - await fs.writeFile( - path.join(fixture, 'SKILL.md'), - '---\nname: visual-validation-gate-plugin\ndescription: fixture with failing visual validation\n---\n# Visual validation gate\n', - ); - - const installResp = await fetch(`${baseUrl}/api/plugins/install`, { - method: 'POST', - headers: { 'content-type': 'application/json', accept: 'text/event-stream' }, - body: JSON.stringify({ source: fixture }), - }); - await readSseUntilSuccess(installResp); - - const projectId = `visual-validation-gate-${Date.now()}`; - const createResp = await fetch(`${baseUrl}/api/projects`, { - method: 'POST', - headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ - id: projectId, - name: 'Visual validation gate pipeline e2e', - pluginId: 'visual-validation-gate-plugin', - pluginInputs: { topic: 'artifact rewrite' }, - grantCaps: ['pipeline:*'], - }), - }); - expect(createResp.status).toBe(200); - const createBody = (await createResp.json()) as { - appliedPluginSnapshotId?: string; - }; - expect(createBody.appliedPluginSnapshotId).toBeTruthy(); - - const seedResp = await fetch(`${baseUrl}/api/projects/${projectId}/files`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: 'index.html', content: '

before

' }), - }); - expect(seedResp.status).toBe(200); - - await withFakeAgent( - 'opencode', - ` -const fs = require('node:fs'); -if (process.argv.includes('--version')) { - console.log('opencode 0.0.0'); - process.exit(0); -} -if (process.argv[2] === 'models') { - console.log('test/model'); - process.exit(0); -} -if (process.argv[2] === 'run') { - setTimeout(() => { - fs.writeFileSync('index.html', '

after

'); - console.log(JSON.stringify({ type: 'text', part: { text: 'rewritten' } })); - process.exit(0); - }, 150); -} -`, - async () => { - const runResp = await fetch(`${baseUrl}/api/runs`, { - method: 'POST', - headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ - agentId: 'opencode', - projectId, - pluginId: 'visual-validation-gate-plugin', - appliedPluginSnapshotId: createBody.appliedPluginSnapshotId, - grantCaps: ['pipeline:*'], - }), - }); - expect(runResp.status).toBe(202); - const runBody = (await runResp.json()) as { runId: string }; - - const eventsResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}/events`, { - headers: { accept: 'text/event-stream' }, - }); - expect(eventsResp.body).toBeTruthy(); - const reader = eventsResp.body!.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - const events: string[] = []; - - while (true) { - const { value, done } = await reader.read(); - if (done) break; - buffer += decoder.decode(value, { stream: true }); - const blocks = buffer.split('\n\n'); - buffer = blocks.pop() ?? ''; - for (const block of blocks) { - const eventLine = block.split('\n').find((line) => line.startsWith('event: ')); - if (!eventLine) continue; - const event = eventLine.slice('event: '.length); - events.push(event); - if (event === 'end') break; - } - if (events.includes('end')) break; - } - - expect(seenHtml).toEqual(['

after

']); - expect(events).toContain('pipeline_stage_started'); - expect(events).toContain('pipeline_stage_completed'); - expect(events.indexOf('pipeline_stage_completed')).toBeGreaterThan(-1); - expect(events.indexOf('end')).toBeGreaterThan(events.indexOf('pipeline_stage_completed')); - - const statusResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}`); - expect(statusResp.status).toBe(200); - const statusBody = (await statusResp.json()) as { status?: string }; - expect(statusBody.status).toBe('failed'); - }, - ); - } finally { - clearAtomWorkers(); - resetBuiltInAtomWorkersForTests(); - registerBuiltInAtomWorkers(); - await fs.rm(tmpRoot, { recursive: true, force: true }); - } - }); }); diff --git a/apps/daemon/tests/plugins-pipeline-runner.test.ts b/apps/daemon/tests/plugins-pipeline-runner.test.ts index 4f0a0f2b3..6dd701b68 100644 --- a/apps/daemon/tests/plugins-pipeline-runner.test.ts +++ b/apps/daemon/tests/plugins-pipeline-runner.test.ts @@ -212,9 +212,6 @@ describe('pipeline-runner: Stage D registry runner integration', () => { runId: 'run-stage-d', projectId: 'project-1', conversationId: 'conv-A', - daemonUrl: null, - cwd: null, - entryFile: null, stage, iteration, snapshot: snap2, @@ -249,9 +246,6 @@ describe('pipeline-runner: Stage D registry runner integration', () => { runId: 'run-permissive', projectId: 'project-1', conversationId: 'conv-A', - daemonUrl: null, - cwd: null, - entryFile: null, stage, iteration, snapshot: snap2, diff --git a/apps/daemon/tests/plugins-pipeline-schedule.test.ts b/apps/daemon/tests/plugins-pipeline-schedule.test.ts deleted file mode 100644 index fbbbe1482..000000000 --- a/apps/daemon/tests/plugins-pipeline-schedule.test.ts +++ /dev/null @@ -1,163 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import type { AppliedPluginSnapshot, GenUISurfaceSpec, PluginPipeline } from '@open-design/contracts'; -import { - splitPipelineByExecutionBoundary, - splitPipelineSnapshotByExecutionBoundary, -} from '../src/plugins/pipeline-schedule.js'; - -describe('splitPipelineByExecutionBoundary', () => { - it('keeps pre-run-only pipelines intact', () => { - const pipeline: PluginPipeline = { - stages: [ - { id: 'discovery', atoms: ['discovery-question-form'] }, - { id: 'plan', atoms: ['todo-write'] }, - ], - }; - - const schedule = splitPipelineByExecutionBoundary(pipeline); - - expect(schedule.preRun).toEqual(pipeline); - expect(schedule.postRun).toBeNull(); - }); - - it('defers visual-validation stages until after the run succeeds', () => { - const pipeline: PluginPipeline = { - stages: [ - { id: 'discovery', atoms: ['discovery-question-form'] }, - { id: 'generate', atoms: ['file-write', 'live-artifact'] }, - { - id: 'critique', - atoms: ['critique-theater', 'visual-validation'], - repeat: true, - until: 'critique.score>=4 || iterations>=3', - }, - ], - }; - - const schedule = splitPipelineByExecutionBoundary(pipeline); - - expect(schedule.preRun?.stages.map((stage) => stage.id)).toEqual([ - 'discovery', - 'generate', - ]); - expect(schedule.postRun?.stages.map((stage) => stage.id)).toEqual([ - 'critique', - ]); - }); - - it('keeps the full suffix in post-run order once a post-run atom appears', () => { - const pipeline: PluginPipeline = { - stages: [ - { id: 'direction', atoms: ['discovery-question-form'] }, - { id: 'patch', atoms: ['file-write'] }, - { id: 'critique', atoms: ['critique-theater', 'visual-validation'] }, - { id: 'handoff', atoms: ['handoff'] }, - ], - }; - - const schedule = splitPipelineByExecutionBoundary(pipeline); - - expect(schedule.preRun?.stages.map((stage) => stage.id)).toEqual([ - 'direction', - 'patch', - ]); - expect(schedule.postRun?.stages.map((stage) => stage.id)).toEqual([ - 'critique', - 'handoff', - ]); - }); -}); - -describe('splitPipelineSnapshotByExecutionBoundary', () => { - it('keeps triggerless surfaces in pre-run only and stage-scopes the deferred suffix', () => { - const surfaces: GenUISurfaceSpec[] = [ - { id: 'confirm', kind: 'confirmation', persist: 'run' }, - { id: 'direction-form', kind: 'form', persist: 'run', trigger: { stageId: 'direction' } }, - { id: 'critique-form', kind: 'form', persist: 'run', trigger: { stageId: 'critique' } }, - { id: 'handoff-form', kind: 'form', persist: 'run', trigger: { stageId: 'handoff' } }, - ]; - const snapshot = { - snapshotId: 'snap-1', - pluginId: 'sample-plugin', - pluginVersion: '1.0.0', - manifestSourceDigest: 'digest-1', - inputs: {}, - resolvedContext: { items: [] }, - capabilitiesGranted: [], - capabilitiesRequired: [], - assetsStaged: [], - taskKind: 'new-generation', - appliedAt: 0, - connectorsRequired: [], - connectorsResolved: [], - mcpServers: [], - pipeline: { - stages: [ - { id: 'direction', atoms: ['discovery-question-form'] }, - { id: 'critique', atoms: ['visual-validation'] }, - { id: 'handoff', atoms: ['handoff'] }, - ], - }, - genuiSurfaces: surfaces, - status: 'fresh', - } as AppliedPluginSnapshot; - - const split = splitPipelineSnapshotByExecutionBoundary(snapshot); - - expect(split.preRun?.pipeline?.stages.map((stage) => stage.id)).toEqual(['direction']); - expect(split.preRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([ - 'confirm', - 'direction-form', - ]); - expect(split.postRun?.pipeline?.stages.map((stage) => stage.id)).toEqual([ - 'critique', - 'handoff', - ]); - expect(split.postRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([ - 'critique-form', - 'handoff-form', - ]); - }); - - it('raises triggerless surfaces before an all-deferred pipeline starts', () => { - const snapshot = { - snapshotId: 'snap-2', - pluginId: 'sample-plugin', - pluginVersion: '1.0.0', - manifestSourceDigest: 'digest-2', - inputs: {}, - resolvedContext: { items: [] }, - capabilitiesGranted: [], - capabilitiesRequired: [], - assetsStaged: [], - taskKind: 'new-generation', - appliedAt: 0, - connectorsRequired: [], - connectorsResolved: [], - mcpServers: [], - pipeline: { - stages: [ - { id: 'critique', atoms: ['visual-validation'] }, - { id: 'handoff', atoms: ['handoff'] }, - ], - }, - genuiSurfaces: [ - { id: 'confirm', kind: 'confirmation', persist: 'run' }, - { id: 'critique-form', kind: 'form', persist: 'run', trigger: { stageId: 'critique' } }, - { id: 'handoff-form', kind: 'form', persist: 'run', trigger: { stageId: 'handoff' } }, - ], - status: 'fresh', - } as AppliedPluginSnapshot; - - const split = splitPipelineSnapshotByExecutionBoundary(snapshot); - - expect(split.preRun?.pipeline?.stages).toEqual([]); - expect(split.preRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([ - 'confirm', - ]); - expect(split.postRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([ - 'critique-form', - 'handoff-form', - ]); - }); -}); diff --git a/apps/daemon/tests/plugins-scenario-fallback.test.ts b/apps/daemon/tests/plugins-scenario-fallback.test.ts index dc49c6a5b..a44d0e873 100644 --- a/apps/daemon/tests/plugins-scenario-fallback.test.ts +++ b/apps/daemon/tests/plugins-scenario-fallback.test.ts @@ -129,9 +129,6 @@ describe('apply: bundled-scenario pipeline fallback (spec §23.3.3)', () => { ]), }); expect(out.result.pipeline?.stages?.[0]?.id).toBe('discovery'); - expect(out.result.pipeline?.stages?.find((stage) => stage.id === 'critique')?.atoms).toEqual([ - 'critique-theater', - ]); }); it('keeps pipeline undefined when no scenario matches the taskKind', () => { diff --git a/apps/daemon/tests/plugins-visual-validation.test.ts b/apps/daemon/tests/plugins-visual-validation.test.ts deleted file mode 100644 index 5a251ff80..000000000 --- a/apps/daemon/tests/plugins-visual-validation.test.ts +++ /dev/null @@ -1,616 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { chmod, mkdir, mkdtemp, readFile, rm, symlink, writeFile } from 'node:fs/promises'; -import os from 'node:os'; -import path from 'node:path'; -import { PNG } from 'pngjs'; -import { - resolveVisualValidationChromiumLaunchOptions, - resolvePackagedPlaywrightBrowsersPath, - runVisualValidation, - similarityToCritiqueScore, -} from '../src/plugins/atoms/visual-validation.js'; - -describe('visual validation atom runner', () => { - it('skips cleanly when no reference images are present', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-skip-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - const result = await runVisualValidation({ - cwd, - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(320, 240, [255, 255, 255, 255]))); - }, - }); - expect(result.report.status).toBe('skipped'); - expect(result.signals).toEqual({}); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed when references exist but no HTML entry file is found', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-missing-entry-')); - try { - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - const result = await runVisualValidation({ - cwd, - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('failed'); - expect(result.report.entryFile).toBeNull(); - expect(result.report.message).toContain('no HTML entry file found'); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - - const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json'); - const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { status?: string; message?: string }; - expect(saved.status).toBe('failed'); - expect(saved.message).toContain('no HTML entry file found'); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('compares rendered output against reference screenshots and writes a report', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-compare-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - const png = createFilledPng(200, 120, [255, 255, 255, 255]); - paintRect(png, { x: 40, y: 25, width: 60, height: 30 }, [255, 0, 0, 255]); - await writeFile(outputPath, PNG.sync.write(png)); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(result.report.comparison?.similarity).toBeLessThan(95); - expect(result.report.comparison?.diffPixels).toBeGreaterThan(0); - expect(result.report.comparison?.suggestions.length).toBeGreaterThan(0); - expect(result.signals['preview.ok']).toBe(true); - expect(result.signals['critique.score']).toBe(3); - - const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json'); - const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { comparison?: { diffPixels?: number } }; - expect(saved.comparison?.diffPixels).toBe(result.report.comparison?.diffPixels); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('honors an explicit entryFile over auto-detected index.html', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-explicit-entry-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'stale', 'utf8'); - await writeFile(path.join(cwd, 'active.html'), 'active', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - let capturedEntryFile: string | null = null; - const result = await runVisualValidation({ - cwd, - entryFile: 'active.html', - entryUrl: 'about:blank', - captureScreenshot: async ({ entryFile, outputPath }) => { - capturedEntryFile = entryFile; - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(result.report.entryFile).toBe('active.html'); - expect(capturedEntryFile).toBe('active.html'); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('uses the daemon preview route instead of file:// when project context is available', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-preview-route-')); - const originalFetch = globalThis.fetch; - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - globalThis.fetch = async (input) => { - expect(String(input)).toBe( - 'http://127.0.0.1:7456/api/projects/project-123/preview-url?file=index.html', - ); - return new Response( - JSON.stringify({ url: '/api/projects/project-123/preview/scope-123/index.html' }), - { status: 200, headers: { 'content-type': 'application/json' } }, - ); - }; - - let capturedEntryUrl: string | null = null; - const result = await runVisualValidation({ - cwd, - projectId: 'project-123', - daemonUrl: 'http://127.0.0.1:7456/', - captureScreenshot: async ({ entryUrl, outputPath }) => { - capturedEntryUrl = entryUrl; - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(capturedEntryUrl).toBe( - 'http://127.0.0.1:7456/api/projects/project-123/preview/scope-123/index.html', - ); - } finally { - globalThis.fetch = originalFetch; - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed instead of falling back to file:// when preview context is unavailable', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-missing-preview-context-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const result = await runVisualValidation({ cwd }); - - expect(result.report.status).toBe('failed'); - expect(result.report.message).toContain('requires daemon preview context'); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('captures with the reference dimensions instead of the old clamp bounds', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-reference-viewport-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(1920, 300, [255, 255, 255, 255])), - ); - - let capturedViewport: { width: number; height: number } | null = null; - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath, viewport }) => { - capturedViewport = viewport; - await writeFile(outputPath, PNG.sync.write(createFilledPng(viewport.width, viewport.height, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(capturedViewport).toEqual({ width: 1920, height: 300 }); - expect(result.report.comparison?.referenceWidth).toBe(1920); - expect(result.report.comparison?.actualWidth).toBe(1920); - expect(result.report.comparison?.referenceHeight).toBe(300); - expect(result.report.comparison?.actualHeight).toBe(300); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed when capture throws', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-fail-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async () => { - throw new Error('playwright launch failed'); - }, - }); - - expect(result.report.status).toBe('failed'); - expect(result.report.message).toContain('playwright launch failed'); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - - const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json'); - const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { status?: string; message?: string }; - expect(saved.status).toBe('failed'); - expect(saved.message).toContain('playwright launch failed'); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed when the visual-validation artifact directory cannot be created', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-artifact-dir-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await writeFile(path.join(cwd, 'critique'), 'not-a-directory', 'utf8'); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('failed'); - expect(result.report.message).toContain('ENOTDIR'); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed when the Playwright browser runtime is unavailable', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-no-browser-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async () => { - throw new Error( - "browserType.launch: Executable doesn't exist at /tmp/ms-playwright/chromium\nPlease run the following command to download new browsers: npx playwright install", - ); - }, - }); - - expect(result.report.status).toBe('failed'); - expect(result.report.message).toContain("Executable doesn't exist"); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('resolves the packaged Playwright browser cache from OD_RESOURCE_ROOT', () => { - expect( - resolvePackagedPlaywrightBrowsersPath({ - OD_RESOURCE_ROOT: '/tmp/open-design/resources', - } as NodeJS.ProcessEnv), - ).toBe('/tmp/open-design/resources/ms-playwright'); - }); - - it('preserves an explicit Playwright browser cache override', () => { - expect( - resolvePackagedPlaywrightBrowsersPath({ - OD_RESOURCE_ROOT: '/tmp/open-design/resources', - PLAYWRIGHT_BROWSERS_PATH: '/custom/playwright-cache', - } as NodeJS.ProcessEnv), - ).toBe('/custom/playwright-cache'); - }); - - it('launches visual validation with Playwright new-headless Chromium', () => { - expect(resolveVisualValidationChromiumLaunchOptions()).toEqual({ - channel: 'chromium', - }); - }); - - it('skips ignored dependency trees before recursing for references', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-ignore-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'references'), { recursive: true }); - await writeFile( - path.join(cwd, 'references', 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await mkdir(path.join(cwd, 'node_modules', 'huge-package', 'assets'), { recursive: true }); - await chmod(path.join(cwd, 'node_modules'), 0o000); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - } finally { - await chmod(path.join(cwd, 'node_modules'), 0o755).catch(() => {}); - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('fails closed when reference auto-discovery hits an unreadable non-ignored directory', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-discovery-unreadable-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await mkdir(path.join(cwd, 'private-assets'), { recursive: true }); - await writeFile(path.join(cwd, 'private-assets', 'notes.txt'), 'keep out', 'utf8'); - await chmod(path.join(cwd, 'private-assets'), 0o000); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('failed'); - expect(result.report.message).toContain('EACCES'); - expect(result.signals['preview.ok']).toBe(false); - expect(result.signals['critique.score']).toBe(1); - } finally { - await chmod(path.join(cwd, 'private-assets'), 0o755).catch(() => {}); - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('only auto-discovers PNG reference screenshots', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-png-only-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile(path.join(cwd, 'reference-home.jpg'), 'not-a-png', 'utf8'); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('skipped'); - expect(result.report.message).toContain('no reference screenshot found'); - expect(result.signals).toEqual({}); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('ignores arbitrary root-level spec-prefixed PNG assets', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-root-spec-asset-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await writeFile( - path.join(cwd, 'special-offer.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('skipped'); - expect(result.report.message).toContain('no reference screenshot found'); - expect(result.signals).toEqual({}); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('still auto-discovers spec-directory PNG assets', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-spec-dir-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'spec'), { recursive: true }); - await writeFile( - path.join(cwd, 'spec', 'special-offer.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(result.report.comparison?.referencePath).toBe('spec/special-offer.png'); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('skips symlinked directories while scanning for references', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-symlink-cycle-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'references'), { recursive: true }); - await writeFile( - path.join(cwd, 'references', 'reference-home.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await mkdir(path.join(cwd, 'loop', 'nested'), { recursive: true }); - await symlink(path.join(cwd, 'loop'), path.join(cwd, 'loop', 'nested', 'back-to-loop')); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(result.report.comparison?.referencePath).toBe('references/reference-home.png'); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('keeps per-reference artifacts distinct when basenames collide', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-collisions-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'references'), { recursive: true }); - await mkdir(path.join(cwd, 'spec'), { recursive: true }); - await writeFile( - path.join(cwd, 'references', 'reference.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await writeFile( - path.join(cwd, 'spec', 'reference.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const captures: string[] = []; - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - captures.push(path.relative(cwd, outputPath)); - await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255]))); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(captures).toEqual([ - 'critique/visual-validation/references-reference-1.actual.png', - 'critique/visual-validation/spec-reference-2.actual.png', - ]); - expect(captures).toContain(result.report.comparison?.actualPath); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('does not treat substring directory names as reference-image segments', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-segment-match-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'assets', 'aspect'), { recursive: true }); - await mkdir(path.join(cwd, 'preferences'), { recursive: true }); - await writeFile( - path.join(cwd, 'assets', 'aspect', 'hero.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await writeFile( - path.join(cwd, 'preferences', 'panel.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const result = await runVisualValidation({ - cwd, - captureScreenshot: async () => { - throw new Error('visual validation should skip when no reference images are present'); - }, - }); - - expect(result.report.status).toBe('skipped'); - expect(result.report.message).toContain('no reference screenshot'); - expect(result.signals).toEqual({}); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('scores from the worst reference match instead of the best one', async () => { - const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-worst-reference-')); - try { - await writeFile(path.join(cwd, 'index.html'), 'ok', 'utf8'); - await mkdir(path.join(cwd, 'references'), { recursive: true }); - await writeFile( - path.join(cwd, 'references', 'reference-desktop.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - await writeFile( - path.join(cwd, 'references', 'reference-mobile.png'), - PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])), - ); - - const result = await runVisualValidation({ - cwd, - entryUrl: 'about:blank', - captureScreenshot: async ({ outputPath }) => { - const png = createFilledPng(200, 120, [255, 255, 255, 255]); - if (outputPath.endsWith('reference-mobile-2.actual.png')) { - paintRect(png, { x: 20, y: 20, width: 160, height: 80 }, [255, 0, 0, 255]); - } - await writeFile(outputPath, PNG.sync.write(png)); - }, - }); - - expect(result.report.status).toBe('ok'); - expect(result.report.comparison?.referencePath).toBe('references/reference-mobile.png'); - expect(result.report.comparison?.similarity).toBeLessThan(50); - expect(result.signals['preview.ok']).toBe(true); - expect(result.signals['critique.score']).toBe(1); - } finally { - await rm(cwd, { recursive: true, force: true }); - } - }); - - it('maps similarity bands to critique scores conservatively', () => { - expect(similarityToCritiqueScore(99)).toBe(5); - expect(similarityToCritiqueScore(96)).toBe(4); - expect(similarityToCritiqueScore(90)).toBe(3); - expect(similarityToCritiqueScore(80)).toBe(2); - expect(similarityToCritiqueScore(60)).toBe(1); - }); -}); - -function createFilledPng( - width: number, - height: number, - rgba: readonly [number, number, number, number], -): PNG { - const png = new PNG({ width, height }); - for (let i = 0; i < png.data.length; i += 4) { - png.data[i] = rgba[0]; - png.data[i + 1] = rgba[1]; - png.data[i + 2] = rgba[2]; - png.data[i + 3] = rgba[3]; - } - return png; -} - -function paintRect( - png: PNG, - rect: { x: number; y: number; width: number; height: number }, - rgba: readonly [number, number, number, number], -): void { - for (let y = rect.y; y < rect.y + rect.height; y += 1) { - for (let x = rect.x; x < rect.x + rect.width; x += 1) { - const index = (y * png.width + x) << 2; - png.data[index] = rgba[0]; - png.data[index + 1] = rgba[1]; - png.data[index + 2] = rgba[2]; - png.data[index + 3] = rgba[3]; - } - } -} diff --git a/nix/pnpm-deps.nix b/nix/pnpm-deps.nix index c59fb8953..b60dd4f7e 100644 --- a/nix/pnpm-deps.nix +++ b/nix/pnpm-deps.nix @@ -9,6 +9,6 @@ # 1. Temporarily set the consuming `hash = lib.fakeHash;` # 2. Run the relevant nix build/flake check # 3. Copy the expected hash printed by Nix into the matching field below - daemonHash = "sha256-w1y5qrGa/vZtg4LXQvyrUp4a4Rk9x6z7ve4Up65P6cA="; - webHash = "sha256-Uj9HlDpTtO8y/0ykTVkFtM0oukt1zSYsmIp7JZ9NJJc="; + daemonHash = "sha256-AEg1yKQK55U9P5EJPNqWNkF9teKAV0rwV84F8Im2Ir0="; + webHash = "sha256-/VNR08beUFynS6/uZHoA9AlZE8PPPicGAyJJ6Oy7trg="; } diff --git a/plugins/_official/atoms/visual-validation/SKILL.md b/plugins/_official/atoms/visual-validation/SKILL.md deleted file mode 100644 index 2959b86bd..000000000 --- a/plugins/_official/atoms/visual-validation/SKILL.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: visual-validation -description: Render the current artifact, compare it against reference screenshots, and feed the result into critique scoring. -od: - scenario: new-generation - mode: critique ---- - -# Visual validation - -This atom renders the current project artifact through the daemon preview -route, compares it against discovered or explicit reference screenshots, and -feeds a conservative score back into the critique loop. - -## Current state - -- The daemon registry executes `visual-validation` as a built-in atom worker. -- Reports are written under `critique/visual-validation/`. -- When no reference screenshots are present, the atom skips without changing - critique signals. - -When references exist but the daemon cannot render the artifact, the atom fails -closed by returning a low critique score and `preview.ok: false`. diff --git a/plugins/_official/atoms/visual-validation/open-design.json b/plugins/_official/atoms/visual-validation/open-design.json deleted file mode 100644 index ead5b2f0f..000000000 --- a/plugins/_official/atoms/visual-validation/open-design.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "$schema": "https://open-design.ai/schemas/plugin.v1.json", - "specVersion": "1.0.0", - "name": "visual-validation", - "title": "Visual validation", - "version": "0.1.0", - "description": "Render the current artifact, compare it against reference screenshots, and feed the result into critique scoring.", - "license": "MIT", - "author": { - "name": "Open Design", - "url": "https://github.com/nexu-io" - }, - "homepage": "https://github.com/nexu-io/open-design/tree/main/plugins/_official/atoms/visual-validation", - "tags": [ - "atom", - "first-party", - "new-generation", - "tune-collab", - "visual-validation" - ], - "compat": { - "agentSkills": [ - { - "path": "./SKILL.md" - } - ] - }, - "od": { - "kind": "atom", - "scenario": "new-generation", - "mode": "critique", - "capabilities": [ - "prompt:inject", - "fs:read", - "fs:write", - "subprocess" - ] - } -} diff --git a/plugins/_official/examples/3d-creator-portfolio/example.html b/plugins/_official/examples/3d-creator-portfolio/example.html index d7a1659e3..612977140 100644 --- a/plugins/_official/examples/3d-creator-portfolio/example.html +++ b/plugins/_official/examples/3d-creator-portfolio/example.html @@ -251,7 +251,7 @@