Revert "feat: add screenshot-based visual validation to critique loop (#3660)" (#3865)

This reverts commit 931780c914.
This commit is contained in:
PerishFire
2026-06-08 14:24:44 +08:00
committed by GitHub
parent cf634805aa
commit 10dfd32a3e
33 changed files with 66 additions and 2668 deletions

View File

@@ -51,11 +51,8 @@
"jszip": "3.10.1",
"multer": "2.1.1",
"node-pty": "1.1.0",
"pixelmatch": "7.2.0",
"playwright": "1.60.0",
"posthog-node": "5.34.6",
"prom-client": "15.1.3",
"pngjs": "7.0.0",
"tar": "7.5.15",
"undici": "7.25.0"
},
@@ -64,7 +61,6 @@
"@types/express": "5.0.6",
"@types/multer": "2.1.0",
"@types/node": "20.19.39",
"@types/pngjs": "6.0.5",
"typescript": "5.9.3",
"vitest": "4.1.6"
},

View File

@@ -27,7 +27,6 @@ export const FIRST_PARTY_ATOMS: ReadonlyArray<AtomCatalogEntry> = [
{ id: 'live-artifact', label: 'Live artifact', description: 'Create/refresh live artifacts.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] },
{ id: 'connector', label: 'Connector', description: 'Composio connector tool calls.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] },
{ id: 'critique-theater', label: 'Critique theater', description: '5-dim panel critique; devloop signal.', status: 'implemented', taskKinds: ['new-generation', 'code-migration', 'figma-migration', 'tune-collab'] },
{ id: 'visual-validation', label: 'Visual validation', description: 'Render the current artifact, compare it to reference screenshots, and feed the result into critique scoring.', status: 'implemented', taskKinds: ['new-generation', 'tune-collab'] },
// Phase 6/7/8 atoms — promoted from 'planned' to 'implemented'
// by the §3.N1-N4 / §3.O2-O5 / §3.P1-P2 / §3.Q2 / §3.S1 slices.
{ id: 'code-import', label: 'Code import', description: 'Walk an existing repo into <cwd>/code/index.json.', status: 'implemented', taskKinds: ['code-migration'] },

View File

@@ -1,16 +1,13 @@
// Plan §3.D — built-in atom workers.
//
// Registered on first use into the worker registry. Every implemented
// atom gets at least a permissive worker so the registry-driven
// pipeline runner stays at parity with the v1 stub for atoms whose
// real work happens entirely inside the agent CLI (file-write,
// todo-write, media-image, …) — the daemon has no independent ground
// truth to observe there and shipping a real watcher would force the
// agent into a fixed protocol we explicitly kept out of scope.
//
// Planned atoms are not registered at all. Plugin doctor already warns
// that those atoms are not runnable yet, and skipping registration keeps
// explicit pipeline stages from masquerading as successful no-op runs.
// Registered on first use into the worker registry. Every atom in
// FIRST_PARTY_ATOMS gets at least a permissive worker so the
// registry-driven pipeline runner stays at parity with the v1 stub
// for atoms whose real work happens entirely inside the agent CLI
// (file-write, todo-write, media-image, …) — the daemon has no
// independent ground truth to observe there and shipping a real
// watcher would force the agent into a fixed protocol we explicitly
// kept out of scope.
//
// One atom does have a daemon-observable signal today:
// `critique-theater`. The worker walks the run's devloop audit log
@@ -26,14 +23,12 @@ import {
type AtomOutcome,
type AtomWorkerContext,
} from './registry.js';
import { runVisualValidation } from './visual-validation.js';
let installed = false;
export function registerBuiltInAtomWorkers(): void {
if (installed) return;
for (const atom of FIRST_PARTY_ATOMS) {
if (atom.status !== 'implemented') continue;
if (atom.id === 'critique-theater') {
registerAtomWorker({
id: atom.id,
@@ -42,14 +37,6 @@ export function registerBuiltInAtomWorkers(): void {
});
continue;
}
if (atom.id === 'visual-validation') {
registerAtomWorker({
id: atom.id,
describe: 'renders the current artifact and compares it against reference screenshots',
run: visualValidationWorker,
});
continue;
}
registerAtomWorker({
id: atom.id,
describe: 'permissive default (daemon has no independent ground truth for this atom)',
@@ -81,28 +68,6 @@ function critiqueTheaterWorker(ctx: AtomWorkerContext): AtomOutcome {
return { signals: {} };
}
async function visualValidationWorker(ctx: AtomWorkerContext): Promise<AtomOutcome> {
if (!ctx.cwd) {
return {
signals: {
'preview.ok': false,
'critique.score': 1,
},
note: 'visual validation failed: run has no project working directory',
};
}
const result = await runVisualValidation({
cwd: ctx.cwd,
projectId: ctx.projectId,
daemonUrl: ctx.daemonUrl,
entryFile: ctx.entryFile,
});
return {
signals: result.signals,
note: result.report.message,
};
}
// Matches `score=4`, `score: 4.5`, `Critique score 4/5`, etc.
function parseCritiqueScore(summary: string | null): number | null {
if (!summary) return null;

View File

@@ -35,9 +35,6 @@ export interface AtomWorkerContext {
runId: string;
projectId: string;
conversationId: string | null;
daemonUrl: string | null;
cwd: string | null;
entryFile: string | null;
stage: PipelineStage;
iteration: number;
snapshot: AppliedPluginSnapshot;

View File

@@ -1,672 +0,0 @@
import path from 'node:path';
import { promises as fsp } from 'node:fs';
import pixelmatch from 'pixelmatch';
import { PNG } from 'pngjs';
import { type Page, type ViewportSize } from 'playwright';
import { detectEntryFile } from '../../projects.js';
import type { UntilSignals } from '../until.js';
const DEFAULT_PIXELMATCH_THRESHOLD = 0.1;
const DEFAULT_DIFF_BOX_PADDING = 12;
const DEFAULT_DIFF_BOX_MERGE_DISTANCE = 24;
const DEFAULT_DIFF_BOX_STROKE_WIDTH = 2;
const DEFAULT_MAX_DIFF_BOX_REGIONS = 12;
const DEFAULT_MAX_CANVAS_PIXELS = 16_000_000;
const DIFF_COLOR = [255, 76, 76] as const;
const IGNORED_REFERENCE_SCAN_DIRS = new Set(['critique', 'dist', 'node_modules', '.next']);
const AUTO_DISCOVERED_REFERENCE_IMAGE_RE = /\.png$/i;
const PACKAGED_PLAYWRIGHT_BROWSERS_DIR = 'ms-playwright';
const VISUAL_VALIDATION_CHROMIUM_CHANNEL = 'chromium';
export interface VisualValidationCaptureInput {
entryFile: string;
entryUrl: string;
outputPath: string;
viewport: ViewportSize;
}
export interface VisualValidationRegion {
minX: number;
minY: number;
maxX: number;
maxY: number;
}
export interface VisualValidationComparison {
referencePath: string;
actualPath: string;
diffPath: string;
referenceWidth: number;
referenceHeight: number;
actualWidth: number;
actualHeight: number;
comparedWidth: number;
comparedHeight: number;
diffPixels: number;
diffRatio: number;
similarity: number;
regions: VisualValidationRegion[];
suggestions: string[];
}
export interface VisualValidationReport {
status: 'ok' | 'skipped' | 'failed';
entryFile: string | null;
message: string;
comparedAt: string;
comparison: VisualValidationComparison | null;
}
export interface RunVisualValidationOptions {
cwd: string;
projectId?: string | null;
daemonUrl?: string | null;
referenceImages?: ReadonlyArray<string>;
entryFile?: string | null;
entryUrl?: string | null;
pixelmatchThreshold?: number;
captureScreenshot?: (input: VisualValidationCaptureInput) => Promise<void>;
}
export async function runVisualValidation(
input: RunVisualValidationOptions,
): Promise<{ report: VisualValidationReport; signals: UntilSignals }> {
const cwd = path.resolve(input.cwd);
const entryFile = input.entryFile ?? await detectEntryFile(cwd);
let outputDir: string | null = null;
try {
const referenceImages = await resolveReferenceImages(cwd, input.referenceImages);
if (referenceImages.length === 0) {
return {
report: {
status: 'skipped',
entryFile,
message: 'skipped: no reference screenshot found for visual validation',
comparedAt: new Date().toISOString(),
comparison: null,
},
signals: {},
};
}
outputDir = path.join(cwd, 'critique', 'visual-validation');
await fsp.mkdir(outputDir, { recursive: true });
if (!entryFile) {
const failure = buildFailedVisualValidationResult(
null,
'visual validation failed: no HTML entry file found for visual validation',
);
await writeVisualValidationArtifacts(outputDir, failure.report);
return failure;
}
let best: VisualValidationComparison | null = null;
for (const [index, referencePath] of referenceImages.entries()) {
const reference = PNG.sync.read(await fsp.readFile(referencePath));
assertPngSize(reference, referencePath);
const viewport = viewportForReference(reference, referencePath);
const stem = buildReferenceArtifactStem(cwd, referencePath, index);
const actualPath = path.join(outputDir, `${stem}.actual.png`);
const diffPath = path.join(outputDir, `${stem}.diff.png`);
const capture = input.captureScreenshot ?? captureWithPlaywright;
const entryUrl = await resolveVisualValidationEntryUrl({
entryFile,
...(input.projectId == null ? {} : { projectId: input.projectId }),
...(input.daemonUrl == null ? {} : { daemonUrl: input.daemonUrl }),
...(input.entryUrl == null ? {} : { entryUrl: input.entryUrl }),
});
await capture({
entryFile,
entryUrl,
outputPath: actualPath,
viewport,
});
const actual = PNG.sync.read(await fsp.readFile(actualPath));
assertPngSize(actual, actualPath);
const comparison = await comparePngs({
cwd,
reference,
referencePath,
actual,
actualPath,
diffPath,
pixelmatchThreshold: input.pixelmatchThreshold ?? DEFAULT_PIXELMATCH_THRESHOLD,
});
if (!best || comparison.similarity < best.similarity) best = comparison;
}
if (!best) {
const failure = buildFailedVisualValidationResult(
entryFile,
'visual validation failed before any comparisons completed',
);
await writeVisualValidationArtifacts(outputDir, failure.report);
return failure;
}
const similarity = best.similarity;
const critiqueBand = similarityToCritiqueScore(similarity);
const report: VisualValidationReport = {
status: 'ok',
entryFile,
message: summarizeComparison(best),
comparedAt: new Date().toISOString(),
comparison: best,
};
await writeVisualValidationArtifacts(outputDir, report);
return {
report,
signals: {
'preview.ok': true,
'critique.score': critiqueBand,
},
};
} catch (error) {
const failure = buildFailedVisualValidationResult(
entryFile,
`visual validation failed: ${formatVisualValidationError(error)}`,
);
if (outputDir) {
await writeVisualValidationArtifacts(outputDir, failure.report).catch(() => {});
}
return failure;
}
}
export function similarityToCritiqueScore(similarity: number): number {
if (similarity >= 98) return 5;
if (similarity >= 95) return 4;
if (similarity >= 88) return 3;
if (similarity >= 78) return 2;
return 1;
}
async function comparePngs(input: {
cwd: string;
reference: PNG;
referencePath: string;
actual: PNG;
actualPath: string;
diffPath: string;
pixelmatchThreshold: number;
}): Promise<VisualValidationComparison> {
const width = Math.max(input.reference.width, input.actual.width);
const height = Math.max(input.reference.height, input.actual.height);
assertPngPixels(width, height, `${input.referencePath} vs ${input.actualPath}`);
const normalizedReference = normalizePng(input.reference, width, height);
const normalizedActual = normalizePng(input.actual, width, height);
const diffMask = new PNG({ width, height });
const diffPixels = pixelmatch(
normalizedReference.data,
normalizedActual.data,
diffMask.data,
width,
height,
{
threshold: input.pixelmatchThreshold,
alpha: 0.2,
diffColor: [DIFF_COLOR[0], DIFF_COLOR[1], DIFF_COLOR[2]],
},
);
const highlighted = clonePng(normalizedActual);
const mergedRegions = mergeDiffBoxes(diffBoxesFromMask(diffMask), DEFAULT_DIFF_BOX_MERGE_DISTANCE);
for (const region of mergedRegions) {
drawBox(highlighted, padBox(region, DEFAULT_DIFF_BOX_PADDING, width, height), DEFAULT_DIFF_BOX_STROKE_WIDTH);
}
await fsp.writeFile(input.diffPath, PNG.sync.write(highlighted));
const totalPixels = width * height;
const diffRatio = totalPixels > 0 ? diffPixels / totalPixels : 0;
const similarity = Number(((1 - diffRatio) * 100).toFixed(2));
return {
referencePath: relativeToProject(input.cwd, input.referencePath),
actualPath: relativeToProject(input.cwd, input.actualPath),
diffPath: relativeToProject(input.cwd, input.diffPath),
referenceWidth: input.reference.width,
referenceHeight: input.reference.height,
actualWidth: input.actual.width,
actualHeight: input.actual.height,
comparedWidth: width,
comparedHeight: height,
diffPixels,
diffRatio: Number(diffRatio.toFixed(6)),
similarity,
regions: mergedRegions,
suggestions: buildSuggestions({
similarity,
regionCount: mergedRegions.length,
comparedWidth: width,
comparedHeight: height,
referenceWidth: input.reference.width,
referenceHeight: input.reference.height,
actualWidth: input.actual.width,
actualHeight: input.actual.height,
}),
};
}
async function captureWithPlaywright(input: VisualValidationCaptureInput): Promise<void> {
configurePackagedPlaywrightEnvironment();
const { chromium } = await import('playwright');
const browser = await chromium.launch(resolveVisualValidationChromiumLaunchOptions());
try {
const page = await browser.newPage({ viewport: input.viewport, deviceScaleFactor: 1 });
await stabilizePage(page);
await page.goto(input.entryUrl, { waitUntil: 'networkidle' });
await page.screenshot({
path: input.outputPath,
fullPage: false,
animations: 'disabled',
caret: 'hide',
});
} finally {
await browser.close();
}
}
export function resolveVisualValidationChromiumLaunchOptions(): { channel: 'chromium' } {
return {
// Playwright's default headless launch uses chromium_headless_shell-*.
// Packaged builds may legitimately ship only chromium-* via `--no-shell`,
// so force the bundled Chromium new-headless channel instead.
channel: VISUAL_VALIDATION_CHROMIUM_CHANNEL,
};
}
export function resolvePackagedPlaywrightBrowsersPath(env: NodeJS.ProcessEnv = process.env): string | null {
const configured = env.PLAYWRIGHT_BROWSERS_PATH?.trim();
if (configured) return configured;
const resourceRoot = env.OD_RESOURCE_ROOT?.trim();
if (!resourceRoot) return null;
return path.join(resourceRoot, PACKAGED_PLAYWRIGHT_BROWSERS_DIR);
}
function configurePackagedPlaywrightEnvironment(env: NodeJS.ProcessEnv = process.env): void {
const browsersPath = resolvePackagedPlaywrightBrowsersPath(env);
if (!browsersPath || env.PLAYWRIGHT_BROWSERS_PATH?.trim()) return;
env.PLAYWRIGHT_BROWSERS_PATH = browsersPath;
}
async function stabilizePage(page: Page): Promise<void> {
await page.addInitScript(`
(() => {
const style = document.createElement('style');
style.textContent = \`
*,
*::before,
*::after {
animation-duration: 0s !important;
animation-delay: 0s !important;
transition-duration: 0s !important;
transition-delay: 0s !important;
caret-color: transparent !important;
}
html {
scroll-behavior: auto !important;
}
\`;
document.documentElement.appendChild(style);
})();
`);
}
async function writeVisualValidationArtifacts(
outputDir: string,
report: VisualValidationReport,
): Promise<void> {
await fsp.writeFile(path.join(outputDir, 'report.json'), JSON.stringify(report, null, 2) + '\n', 'utf8');
const lines = [
'# Visual validation',
'',
`Status: ${report.status}`,
`Compared at: ${report.comparedAt}`,
report.entryFile ? `Entry file: ${report.entryFile}` : 'Entry file: <none>',
'',
report.message,
];
if (report.comparison) {
lines.push(
'',
`Reference: ${report.comparison.referencePath}`,
`Actual: ${report.comparison.actualPath}`,
`Diff: ${report.comparison.diffPath}`,
`Similarity: ${report.comparison.similarity}%`,
`Diff ratio: ${(report.comparison.diffRatio * 100).toFixed(2)}%`,
);
if (report.comparison.suggestions.length > 0) {
lines.push('', 'Suggestions:');
for (const suggestion of report.comparison.suggestions) {
lines.push(`- ${suggestion}`);
}
}
}
await fsp.writeFile(path.join(outputDir, 'summary.md'), lines.join('\n') + '\n', 'utf8');
}
function buildFailedVisualValidationResult(
entryFile: string | null,
message: string,
): { report: VisualValidationReport; signals: UntilSignals } {
return {
report: {
status: 'failed',
entryFile,
message,
comparedAt: new Date().toISOString(),
comparison: null,
},
signals: { 'preview.ok': false, 'critique.score': 1 },
};
}
async function resolveVisualValidationEntryUrl(input: {
entryFile: string;
projectId?: string | null;
daemonUrl?: string | null;
entryUrl?: string | null;
}): Promise<string> {
if (typeof input.entryUrl === 'string' && input.entryUrl.length > 0) {
return input.entryUrl;
}
if (!input.projectId || !input.daemonUrl) {
throw new Error(
'visual validation requires daemon preview context to resolve the project entry URL',
);
}
const base = input.daemonUrl.replace(/\/+$/, '');
const response = await fetch(
`${base}/api/projects/${encodeURIComponent(input.projectId)}/preview-url?file=${encodeURIComponent(input.entryFile)}`,
{ headers: { accept: 'application/json' } },
);
if (!response.ok) {
throw new Error(`visual validation preview route lookup failed: ${response.status} ${response.statusText}`);
}
const payload = await response.json() as { url?: unknown };
if (typeof payload.url !== 'string' || payload.url.length === 0) {
throw new Error('visual validation preview route lookup returned no url');
}
return new URL(payload.url, `${base}/`).toString();
}
async function resolveReferenceImages(
cwd: string,
explicit?: ReadonlyArray<string>,
): Promise<string[]> {
if (explicit && explicit.length > 0) {
return explicit.map((entry) => path.resolve(cwd, entry));
}
const files = await walkFiles(cwd, '');
const candidates = files.filter((relPath) => {
const lower = relPath.toLowerCase();
if (!AUTO_DISCOVERED_REFERENCE_IMAGE_RE.test(lower)) return false;
if (lower.startsWith('critique/')) return false;
return isAutoDiscoveredReferenceImage(lower);
});
candidates.sort();
return candidates.map((relPath) => path.join(cwd, relPath));
}
function isAutoDiscoveredReferenceImage(relPath: string): boolean {
const name = path.basename(relPath);
const dirSegments = path.dirname(relPath)
.split(/[\\/]+/)
.filter((segment) => segment !== '.' && segment.length > 0);
return isNamedReferenceImage(name)
|| dirSegments.includes('references')
|| dirSegments.includes('reference')
|| dirSegments.includes('spec');
}
function isNamedReferenceImage(name: string): boolean {
return name.startsWith('reference')
|| name.startsWith('baseline')
|| name.startsWith('expected');
}
async function walkFiles(root: string, relDir: string): Promise<string[]> {
const dir = relDir ? path.join(root, relDir) : root;
const entries = await fsp.readdir(dir, { withFileTypes: true });
const out: string[] = [];
for (const entry of entries) {
if (entry.name.startsWith('.')) continue;
if (entry.isSymbolicLink()) continue;
const relPath = relDir ? path.join(relDir, entry.name) : entry.name;
if (entry.isDirectory()) {
if (IGNORED_REFERENCE_SCAN_DIRS.has(entry.name)) continue;
out.push(...await walkFiles(root, relPath));
continue;
}
if (entry.isFile()) out.push(relPath);
}
return out;
}
function summarizeComparison(comparison: VisualValidationComparison): string {
const parts = [
`visual similarity ${comparison.similarity}% against ${path.basename(comparison.referencePath)}`,
`${comparison.regions.length} highlighted diff region${comparison.regions.length === 1 ? '' : 's'}`,
];
if (comparison.suggestions.length > 0) {
parts.push(`focus: ${comparison.suggestions[0]}`);
}
return parts.join('; ');
}
function formatVisualValidationError(error: unknown): string {
if (error instanceof Error && error.message) return error.message;
return 'unknown error';
}
function buildSuggestions(input: {
similarity: number;
regionCount: number;
comparedWidth: number;
comparedHeight: number;
referenceWidth: number;
referenceHeight: number;
actualWidth: number;
actualHeight: number;
}): string[] {
const suggestions: string[] = [];
if (Math.abs(input.referenceWidth - input.actualWidth) > 24 || Math.abs(input.referenceHeight - input.actualHeight) > 24) {
suggestions.push('Match the reference canvas size or responsive breakpoint before tuning local styling.');
}
if (input.regionCount === 0 && input.similarity < 100) {
suggestions.push('Recheck anti-aliasing, image loading, and screenshot viewport settings.');
} else if (input.regionCount <= 2 && input.similarity < 95) {
suggestions.push('Fix the most visible component-level styling mismatches in the highlighted regions.');
} else if (input.regionCount >= 6) {
suggestions.push('Layout, spacing, or typography is drifting across the page rather than in one isolated component.');
}
if (input.similarity < 90) {
suggestions.push('Audit large spacing, sizing, and color-token differences before doing fine polish.');
}
if (suggestions.length === 0) {
suggestions.push('Only minor visual polish remains; tighten spacing and token parity in the highlighted regions.');
}
return suggestions;
}
function clonePng(source: PNG): PNG {
const target = new PNG({ width: source.width, height: source.height });
source.data.copy(target.data);
return target;
}
function viewportForReference(reference: PNG, label: string): ViewportSize {
assertPngSize(reference, label);
return { width: reference.width, height: reference.height };
}
function normalizePng(source: PNG, width: number, height: number): PNG {
const out = new PNG({ width, height });
PNG.bitblt(source, out, 0, 0, source.width, source.height, 0, 0);
return out;
}
function assertPngSize(png: PNG, label: string): void {
assertPngPixels(png.width, png.height, label);
}
function assertPngPixels(width: number, height: number, label: string): void {
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
throw new Error(`${label} has invalid PNG dimensions`);
}
const pixels = width * height;
if (pixels > DEFAULT_MAX_CANVAS_PIXELS) {
throw new Error(`${label} is ${pixels} pixels; maximum allowed is ${DEFAULT_MAX_CANVAS_PIXELS} pixels`);
}
}
function drawBox(png: PNG, box: VisualValidationRegion, strokeWidth: number): void {
for (let y = box.minY; y <= box.maxY; y += 1) {
for (let x = box.minX; x <= box.maxX; x += 1) {
const isStroke =
x - box.minX < strokeWidth
|| box.maxX - x < strokeWidth
|| y - box.minY < strokeWidth
|| box.maxY - y < strokeWidth;
if (!isStroke) continue;
const index = (y * png.width + x) << 2;
png.data[index] = DIFF_COLOR[0];
png.data[index + 1] = DIFF_COLOR[1];
png.data[index + 2] = DIFF_COLOR[2];
png.data[index + 3] = 255;
}
}
}
function diffBoxesFromMask(maskPng: PNG): VisualValidationRegion[] {
const { width, height } = maskPng;
const changed = new Uint8Array(width * height);
let overall: VisualValidationRegion | null = null;
for (let index = 0; index < changed.length; index += 1) {
const dataIndex = index << 2;
if (
maskPng.data[dataIndex] === DIFF_COLOR[0]
&& maskPng.data[dataIndex + 1] === DIFF_COLOR[1]
&& maskPng.data[dataIndex + 2] === DIFF_COLOR[2]
) {
changed[index] = 1;
const x = index % width;
const y = Math.floor(index / width);
overall = overall == null
? { minX: x, minY: y, maxX: x, maxY: y }
: {
minX: Math.min(overall.minX, x),
minY: Math.min(overall.minY, y),
maxX: Math.max(overall.maxX, x),
maxY: Math.max(overall.maxY, y),
};
}
}
if (overall == null) return [];
const boxes: VisualValidationRegion[] = [];
const queue = new Int32Array(width * height);
for (let index = 0; index < changed.length; index += 1) {
if (changed[index] === 0) continue;
let head = 0;
let tail = 0;
let minX = index % width;
let maxX = minX;
let minY = Math.floor(index / width);
let maxY = minY;
changed[index] = 0;
queue[tail++] = index;
while (head < tail) {
const current = queue[head++] ?? -1;
if (current < 0) continue;
const x = current % width;
const y = Math.floor(current / width);
minX = Math.min(minX, x);
maxX = Math.max(maxX, x);
minY = Math.min(minY, y);
maxY = Math.max(maxY, y);
tail = enqueueChanged(changed, queue, tail, x > 0 ? current - 1 : -1);
tail = enqueueChanged(changed, queue, tail, x < width - 1 ? current + 1 : -1);
tail = enqueueChanged(changed, queue, tail, y > 0 ? current - width : -1);
tail = enqueueChanged(changed, queue, tail, y < height - 1 ? current + width : -1);
}
boxes.push({ minX, minY, maxX, maxY });
if (boxes.length > DEFAULT_MAX_DIFF_BOX_REGIONS) return [overall];
}
return boxes;
}
function enqueueChanged(changed: Uint8Array, queue: Int32Array, tail: number, index: number): number {
if (index < 0 || changed[index] === 0) return tail;
changed[index] = 0;
queue[tail] = index;
return tail + 1;
}
function mergeDiffBoxes(boxes: VisualValidationRegion[], distance: number): VisualValidationRegion[] {
if (boxes.length < 2) return boxes;
const pending = boxes.slice();
const merged: VisualValidationRegion[] = [];
while (pending.length > 0) {
let current = pending.shift()!;
let changed = true;
while (changed) {
changed = false;
for (let i = pending.length - 1; i >= 0; i -= 1) {
const candidate = pending[i]!;
if (!boxesTouchOrNear(current, candidate, distance)) continue;
current = {
minX: Math.min(current.minX, candidate.minX),
minY: Math.min(current.minY, candidate.minY),
maxX: Math.max(current.maxX, candidate.maxX),
maxY: Math.max(current.maxY, candidate.maxY),
};
pending.splice(i, 1);
changed = true;
}
}
merged.push(current);
}
return merged;
}
function boxesTouchOrNear(a: VisualValidationRegion, b: VisualValidationRegion, distance: number): boolean {
return !(
a.maxX + distance < b.minX
|| b.maxX + distance < a.minX
|| a.maxY + distance < b.minY
|| b.maxY + distance < a.minY
);
}
function padBox(
box: VisualValidationRegion,
padding: number,
maxWidth: number,
maxHeight: number,
): VisualValidationRegion {
return {
minX: clamp(box.minX - padding, 0, maxWidth - 1),
minY: clamp(box.minY - padding, 0, maxHeight - 1),
maxX: clamp(box.maxX + padding, 0, maxWidth - 1),
maxY: clamp(box.maxY + padding, 0, maxHeight - 1),
};
}
function sanitizeStem(input: string): string {
const normalized = input.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '');
return normalized || 'reference';
}
function buildReferenceArtifactStem(cwd: string, referencePath: string, index: number): string {
const relativeReferencePath = relativeToProject(cwd, referencePath)
.replace(/^\.\//, '')
.replace(/\.[^.]+$/, '');
return `${sanitizeStem(relativeReferencePath)}-${index + 1}`;
}
function relativeToProject(cwd: string, target: string): string {
return path.relative(cwd, target).split(path.sep).join('/');
}
function clamp(value: number, min: number, max: number): number {
return Math.max(min, Math.min(max, value));
}

View File

@@ -83,7 +83,6 @@ export * from './atoms/patch-edit.js';
export * from './atoms/registry.js';
export * from './atoms/rewrite-plan.js';
export * from './atoms/token-map.js';
export * from './atoms/visual-validation.js';
export * from './bundled.js';
export * from './connector-gate.js';
export * from './connector-probe.js';
@@ -94,7 +93,6 @@ export * from './lockfile.js';
export * from './persistence.js';
export * from './marketplaces.js';
export * from './pipeline.js';
export * from './pipeline-schedule.js';
export * from './pipeline-runner.js';
export * from './publish.js';
export * from './registry.js';

View File

@@ -1,106 +0,0 @@
import type {
AppliedPluginSnapshot,
GenUISurfaceSpec,
PipelineStage,
PluginPipeline,
} from '@open-design/contracts';
const POST_RUN_ATOMS = new Set([
'visual-validation',
]);
export interface PipelineScheduleSplit {
preRun: PluginPipeline | null;
postRun: PluginPipeline | null;
}
export interface PipelineSnapshotScheduleSplit {
preRun: AppliedPluginSnapshot | null;
postRun: AppliedPluginSnapshot | null;
}
export function splitPipelineByExecutionBoundary(
pipeline: PluginPipeline | null | undefined,
): PipelineScheduleSplit {
if (!pipeline?.stages?.length) {
return { preRun: null, postRun: null };
}
const postRunStart = pipeline.stages.findIndex((stage) =>
stage.atoms.some((atomId) => POST_RUN_ATOMS.has(atomId)));
if (postRunStart < 0) {
return { preRun: pipeline, postRun: null };
}
const preRunStages: PipelineStage[] = pipeline.stages.slice(0, postRunStart);
const postRunStages: PipelineStage[] = pipeline.stages.slice(postRunStart);
return {
preRun: preRunStages.length > 0 ? { ...pipeline, stages: preRunStages } : null,
postRun: postRunStages.length > 0 ? { ...pipeline, stages: postRunStages } : null,
};
}
export function splitPipelineSnapshotByExecutionBoundary(
snapshot: AppliedPluginSnapshot | null | undefined,
): PipelineSnapshotScheduleSplit {
if (!snapshot) {
return { preRun: null, postRun: null };
}
const pipelineSplit = splitPipelineByExecutionBoundary(snapshot.pipeline);
const preRunSurfaceOnlySnapshot = buildPreRunSurfaceOnlySnapshot(snapshot, pipelineSplit.preRun);
return {
preRun: pipelineSplit.preRun
? {
...snapshot,
pipeline: pipelineSplit.preRun,
genuiSurfaces: filterSurfacesForPipelineStages(
snapshot.genuiSurfaces,
pipelineSplit.preRun,
{ includeTriggerless: true },
),
}
: preRunSurfaceOnlySnapshot,
postRun: pipelineSplit.postRun
? {
...snapshot,
pipeline: pipelineSplit.postRun,
genuiSurfaces: filterSurfacesForPipelineStages(
snapshot.genuiSurfaces,
pipelineSplit.postRun,
{ includeTriggerless: false },
),
}
: null,
};
}
function buildPreRunSurfaceOnlySnapshot(
snapshot: AppliedPluginSnapshot,
preRunPipeline: PluginPipeline | null,
): AppliedPluginSnapshot | null {
if (preRunPipeline) return null;
const triggerlessSurfaces = snapshot.genuiSurfaces?.filter((surface) => !surface.trigger?.stageId);
if (!triggerlessSurfaces?.length) return null;
return {
...snapshot,
pipeline: { stages: [] },
genuiSurfaces: triggerlessSurfaces,
};
}
function filterSurfacesForPipelineStages(
surfaces: GenUISurfaceSpec[] | undefined,
pipeline: PluginPipeline,
options: { includeTriggerless: boolean },
): GenUISurfaceSpec[] | undefined {
if (!surfaces?.length) return surfaces;
const stageIds = new Set(pipeline.stages.map((stage) => stage.id));
return surfaces.filter((surface) => {
const stageId = surface.trigger?.stageId;
if (!stageId) return options.includeTriggerless;
return stageIds.has(stageId);
});
}

View File

@@ -157,7 +157,6 @@ import {
restoreProjectSnapshotLink,
resolvePluginSnapshot,
runPipelineForRun,
splitPipelineSnapshotByExecutionBoundary,
runStageWithRegistry,
startSnapshotGc,
uninstallPlugin,
@@ -10935,13 +10934,10 @@ export async function startServer({
// back to the canned v1 stub for diagnostic bisection or replay
// of pre-Stage-D runs. Errors are swallowed (logged) so a bad
// pipeline never blocks the agent run.
const executePipelineForRun = async (args) => {
const firePipelineForRun = (args) => {
const { run, snapshot, runs, db: dbHandle } = args;
if (!snapshot?.pipeline) {
return { outcomes: [], lastSignalsByStage: new Map() };
}
if (!snapshot?.pipeline?.stages?.length) return;
const env = { maxIterations: readPluginEnvKnobs().maxDevloopIterations };
const lastSignalsByStage = new Map();
const emitPipeline = (evt) => {
try { runs.emit(run, evt.kind, evt); } catch {/* ignore */}
};
@@ -10956,47 +10952,32 @@ export async function startServer({
: 'registry';
let runStage;
if (runnerMode === 'stub') {
runStage = ({ stage, iteration }) => {
const outcome = {
signals: {
'critique.score': iteration >= 0 ? 4 : 0,
'preview.ok': true,
'user.confirmed': true,
},
};
lastSignalsByStage.set(stage.id, outcome.signals);
return outcome;
};
runStage = ({ iteration }) => ({
signals: {
'critique.score': iteration >= 0 ? 4 : 0,
'preview.ok': true,
'user.confirmed': true,
},
});
} else {
registerBuiltInAtomWorkers();
runStage = async ({ stage, iteration, snapshot: stageSnapshot }) => {
const projectRecord = getProject(dbHandle, projectIdForRun);
const cwd = projectRecord
? resolveProjectDir(PROJECTS_DIR, projectIdForRun, projectRecord.metadata)
: null;
const entryFile = typeof projectRecord?.metadata?.entryFile === 'string'
? projectRecord.metadata.entryFile
: null;
const outcome = await runStageWithRegistry({
db: dbHandle,
runId: run.id,
projectId: projectIdForRun,
conversationId: run.conversationId ?? null,
daemonUrl,
cwd,
entryFile,
stage,
iteration,
snapshot: stageSnapshot,
});
lastSignalsByStage.set(stage.id, outcome.signals ?? {});
return {
signals: outcome.signals,
critiqueSummary: outcome.critiqueSummary,
};
};
}
const outcomes = await runPipelineForRun({
void runPipelineForRun({
db: dbHandle,
runId: run.id,
projectId: projectIdForRun,
@@ -11007,13 +10988,7 @@ export async function startServer({
runStage,
emitPipeline,
emitGenui,
});
return { outcomes, lastSignalsByStage };
};
const firePipelineForRun = (args) => {
const { run, snapshot, runs } = args;
void executePipelineForRun(args).catch((err) => {
}).catch((err) => {
try {
runs.emit(run, 'pipeline_stage_failed', {
runId: run.id,
@@ -13542,67 +13517,25 @@ export async function startServer({
for (const chunk of plaintextStdoutBuffer) {
send('stdout', { chunk });
}
let finalStatus = status;
if (
finalStatus === 'succeeded'
&& run.postRunPipelineSnapshot?.pipeline?.stages?.length
) {
try {
const { outcomes, lastSignalsByStage } = await executePipelineForRun({
run,
snapshot: run.postRunPipelineSnapshot,
runs: design.runs,
db,
// Capture the pi session file path for conversational continuity.
// The session path is discovered by attachPiRpcSession when it
// processes agent_end; persist it under (conversationId, agentId) so
// another conversation in the same cwd cannot inherit this history.
if (acpSession && typeof acpSession.getLastSessionPath === 'function') {
const sessionPath = acpSession.getLastSessionPath();
if (status === 'succeeded' && def.streamFormat === 'pi-rpc') {
persistCapturedAgentSession(db, {
conversationId: run.conversationId,
agentId: def.id,
sessionId: sessionPath,
stablePromptHash: currentStableHash,
});
const failedStage = outcomes.find((outcome) => {
if (!outcome.converged) return true;
const stage = run.postRunPipelineSnapshot.pipeline.stages.find(
(candidate) => candidate.id === outcome.stageId,
);
if (!stage?.atoms.includes('visual-validation')) return false;
const signals = lastSignalsByStage.get(outcome.stageId) ?? {};
if (signals['preview.ok'] === false) return true;
return typeof signals['critique.score'] === 'number'
&& signals['critique.score'] < 4;
});
if (failedStage) {
const failedSignals = lastSignalsByStage.get(failedStage.stageId) ?? {};
const failedScore = failedSignals['critique.score'];
send('error', createSseErrorPayload(
'PLUGIN_PIPELINE_FAILED',
typeof failedScore === 'number'
? `Post-run visual validation scored ${failedScore}, so the run cannot finish successfully.`
: `Post-run pipeline stage "${failedStage.stageId}" did not finish successfully.`,
));
finalStatus = 'failed';
}
} catch (err) {
send('error', createSseErrorPayload(
'PLUGIN_PIPELINE_FAILED',
err instanceof Error ? err.message : String(err),
));
finalStatus = 'failed';
}
}
if (finalStatus === 'succeeded') {
// Capture the pi session file path for conversational continuity.
// The session path is discovered by attachPiRpcSession when it
// processes agent_end; persist it under (conversationId, agentId) so
// another conversation in the same cwd cannot inherit this history.
if (acpSession && typeof acpSession.getLastSessionPath === 'function') {
const sessionPath = acpSession.getLastSessionPath();
if (def.streamFormat === 'pi-rpc') {
persistCapturedAgentSession(db, {
conversationId: run.conversationId,
agentId: def.id,
sessionId: sessionPath,
stablePromptHash: currentStableHash,
});
}
}
if (status === 'succeeded') {
persistDeliveredAgentSessionState();
}
finishWithRetryDecision(finalStatus, code, signal);
finishWithRetryDecision(status, code, signal);
} finally {
// Best-effort cleanup of the per-run agy log file on every close
// path — successful, failed, cancelled, or non-zero exit — so
@@ -14073,17 +14006,19 @@ export async function startServer({
: {}),
};
res.status(202).json(body);
const pipelineSchedule = resolvedSnapshot?.ok
? splitPipelineSnapshotByExecutionBoundary(resolvedSnapshot.snapshot)
: { preRun: null, postRun: null };
// Fire only pre-run-safe stages before the agent starts. Stages that
// depend on agent-produced artifacts (`visual-validation`) are
// deferred until the run succeeds so they inspect the current output
// instead of the untouched pre-run workspace.
if (resolvedSnapshot?.ok && pipelineSchedule.preRun) {
// Plan §3.I1 / spec §10.1 — fire the pipeline schedule on the run's
// SSE stream BEFORE the agent process is started. The first
// pipeline_stage_started event is emitted synchronously (before
// the first await inside runPipelineForRun), so any SSE consumer
// that subscribes between create() and start() sees a stage event
// ahead of the agent's message_chunk stream — exactly what §8 e2e-3
// expects. The stub stage runner returns immediately so a
// non-loop pipeline walks through every stage in O(stages) time;
// the audit row in `run_devloop_iterations` records the timeline.
if (resolvedSnapshot?.ok && resolvedSnapshot.snapshot.pipeline) {
firePipelineForRun({
run,
snapshot: pipelineSchedule.preRun,
snapshot: resolvedSnapshot.snapshot,
runs: design.runs,
db,
});
@@ -14098,7 +14033,6 @@ export async function startServer({
console.warn('[plugins] skill candidate hook setup failed', err);
}
}
run.postRunPipelineSnapshot = pipelineSchedule.postRun;
design.runs.start(run, () => startChatRun(meta, run));
// Analytics v2: emit run_created (daemon-side authoritative) and

View File

@@ -92,9 +92,6 @@ function ctxFor(stage: PipelineStage, iteration = 0): AtomWorkerContext {
runId: 'run-1',
projectId: 'project-1',
conversationId: 'conv-A',
daemonUrl: null,
cwd: tmpDir,
entryFile: null,
stage,
iteration,
snapshot: fakeSnapshot(),
@@ -230,7 +227,7 @@ describe('built-in critique-theater worker', () => {
});
describe('registerBuiltInAtomWorkers: idempotency', () => {
it('registers every implemented first-party atom exactly once even on repeat calls', () => {
it('registers every FIRST_PARTY_ATOM exactly once even on repeat calls', () => {
registerBuiltInAtomWorkers();
const first = listRegisteredAtomIds();
registerBuiltInAtomWorkers();
@@ -239,7 +236,6 @@ describe('registerBuiltInAtomWorkers: idempotency', () => {
expect(first).toContain('critique-theater');
expect(first).toContain('file-write');
expect(first).toContain('media-image');
expect(first).toContain('visual-validation');
});
});

View File

@@ -29,15 +29,9 @@ describe('atoms catalog — Phase 6/7/8 promotion', () => {
expect(atom?.taskKinds).toContain('code-migration');
});
it("'visual-validation' is implemented with daemon-backed screenshot comparison", () => {
const atom = findAtom('visual-validation');
expect(atom?.status).toBe('implemented');
expect(isImplementedAtom('visual-validation')).toBe(true);
});
it('the catalog no longer contains planned first-party atoms', () => {
it('the catalog has no remaining planned atoms (after the §3.AA2 promotion)', () => {
const planned = FIRST_PARTY_ATOMS.filter((a) => a.status === 'planned');
expect(planned).toEqual([]);
expect(planned.map((a) => a.id)).toEqual([]);
});
it('every atom in the catalog has a non-empty taskKinds[]', () => {

View File

@@ -25,7 +25,6 @@ const PHASE_4_ATOMS = [
'direction-picker',
'todo-write',
'critique-theater',
'visual-validation',
];
// Phase 6 (figma-migration native, spec §21.4)

View File

@@ -62,10 +62,6 @@ describe('plugins/_official/scenarios roster', () => {
expect(manifest.od.taskKind).toBe(expected.taskKind);
const stageIds = manifest.od.pipeline.stages.map((s: { id: string }) => s.id);
expect(stageIds).toEqual(expected.pipelineStages);
if (folder === 'od-new-generation' || folder === 'od-tune-collab') {
const critiqueStage = manifest.od.pipeline.stages.find((s: { id: string }) => s.id === 'critique');
expect(critiqueStage?.atoms).toEqual(['critique-theater']);
}
});
}

View File

@@ -31,14 +31,6 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import path from 'node:path';
import url from 'node:url';
import { promisify } from 'node:util';
import {
clearAtomWorkers,
registerAtomWorker,
} from '../src/plugins/atoms/registry.js';
import {
registerBuiltInAtomWorkers,
resetBuiltInAtomWorkersForTests,
} from '../src/plugins/atoms/built-ins.js';
import { startServer } from '../src/server.js';
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
@@ -728,324 +720,4 @@ process.stdin.on('end', () => {
await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}/cancel`, { method: 'POST' });
await fs.rm(tmpRoot, { recursive: true, force: true });
});
it('defers visual-validation until after the agent rewrites the artifact', async () => {
const fs = await import('node:fs/promises');
const os = await import('node:os');
const tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'od-headless-visual-validation-'));
const fixture = path.join(tmpRoot, 'visual-validation-plugin');
const seenHtml: string[] = [];
clearAtomWorkers();
resetBuiltInAtomWorkersForTests();
registerBuiltInAtomWorkers();
registerAtomWorker({
id: 'visual-validation',
run: async (ctx) => {
if (!ctx.cwd) throw new Error('expected project cwd for visual validation');
seenHtml.push(await readFile(path.join(ctx.cwd, 'index.html'), 'utf8'));
return {
signals: {
'preview.ok': true,
'critique.score': 5,
},
note: 'captured test artifact',
};
},
});
try {
await fs.mkdir(fixture, { recursive: true });
await fs.writeFile(
path.join(fixture, 'open-design.json'),
JSON.stringify({
$schema: 'https://open-design.ai/schemas/plugin.v1.json',
name: 'visual-validation-plugin',
title: 'Visual Validation Plugin',
version: '1.0.0',
description: 'fixture with a post-run visual validation stage',
license: 'MIT',
od: {
kind: 'skill',
taskKind: 'new-generation',
useCase: { query: 'Make a {{topic}} brief.' },
inputs: [{ name: 'topic', type: 'string', required: true, label: 'Topic' }],
pipeline: {
stages: [
{
id: 'critique',
atoms: ['visual-validation'],
repeat: false,
},
],
},
capabilities: ['prompt:inject'],
},
}, null, 2),
);
await fs.writeFile(
path.join(fixture, 'SKILL.md'),
'---\nname: visual-validation-plugin\ndescription: fixture with visual validation\n---\n# Visual validation\n',
);
const installResp = await fetch(`${baseUrl}/api/plugins/install`, {
method: 'POST',
headers: { 'content-type': 'application/json', accept: 'text/event-stream' },
body: JSON.stringify({ source: fixture }),
});
await readSseUntilSuccess(installResp);
const projectId = `visual-validation-${Date.now()}`;
const createResp = await fetch(`${baseUrl}/api/projects`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
id: projectId,
name: 'Visual validation pipeline e2e',
pluginId: 'visual-validation-plugin',
pluginInputs: { topic: 'artifact rewrite' },
grantCaps: ['pipeline:*'],
}),
});
expect(createResp.status).toBe(200);
const createBody = (await createResp.json()) as {
appliedPluginSnapshotId?: string;
};
expect(createBody.appliedPluginSnapshotId).toBeTruthy();
const seedResp = await fetch(`${baseUrl}/api/projects/${projectId}/files`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: 'index.html', content: '<!doctype html><h1>before</h1>' }),
});
expect(seedResp.status).toBe(200);
await withFakeAgent(
'opencode',
`
const fs = require('node:fs');
if (process.argv.includes('--version')) {
console.log('opencode 0.0.0');
process.exit(0);
}
if (process.argv[2] === 'models') {
console.log('test/model');
process.exit(0);
}
if (process.argv[2] === 'run') {
setTimeout(() => {
fs.writeFileSync('index.html', '<!doctype html><h1>after</h1>');
console.log(JSON.stringify({ type: 'text', part: { text: 'rewritten' } }));
process.exit(0);
}, 150);
}
`,
async () => {
const runResp = await fetch(`${baseUrl}/api/runs`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
agentId: 'opencode',
projectId,
pluginId: 'visual-validation-plugin',
appliedPluginSnapshotId: createBody.appliedPluginSnapshotId,
grantCaps: ['pipeline:*'],
}),
});
expect(runResp.status).toBe(202);
const runBody = (await runResp.json()) as { runId: string };
await new Promise((resolve) => setTimeout(resolve, 50));
expect(seenHtml).toEqual([]);
const deadline = Date.now() + 5_000;
while (Date.now() < deadline) {
const statusResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}`);
expect(statusResp.status).toBe(200);
const statusBody = (await statusResp.json()) as { status?: string };
if (statusBody.status === 'succeeded' && seenHtml.length > 0) break;
await new Promise((resolve) => setTimeout(resolve, 50));
}
expect(seenHtml).toEqual(['<!doctype html><h1>after</h1>']);
},
);
} finally {
clearAtomWorkers();
resetBuiltInAtomWorkersForTests();
registerBuiltInAtomWorkers();
await fs.rm(tmpRoot, { recursive: true, force: true });
}
});
it('fails the run after post-run visual validation and before the terminal end event', async () => {
const fs = await import('node:fs/promises');
const os = await import('node:os');
const tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'od-headless-visual-validation-gate-'));
const fixture = path.join(tmpRoot, 'visual-validation-gate-plugin');
const seenHtml: string[] = [];
clearAtomWorkers();
resetBuiltInAtomWorkersForTests();
registerBuiltInAtomWorkers();
registerAtomWorker({
id: 'visual-validation',
run: async (ctx) => {
if (!ctx.cwd) throw new Error('expected project cwd for visual validation');
seenHtml.push(await readFile(path.join(ctx.cwd, 'index.html'), 'utf8'));
return {
signals: {
'preview.ok': false,
'critique.score': 1,
},
note: 'captured failing test artifact',
};
},
});
try {
await fs.mkdir(fixture, { recursive: true });
await fs.writeFile(
path.join(fixture, 'open-design.json'),
JSON.stringify({
$schema: 'https://open-design.ai/schemas/plugin.v1.json',
name: 'visual-validation-gate-plugin',
title: 'Visual Validation Gate Plugin',
version: '1.0.0',
description: 'fixture with a failing post-run visual validation stage',
license: 'MIT',
od: {
kind: 'skill',
taskKind: 'new-generation',
useCase: { query: 'Make a {{topic}} brief.' },
inputs: [{ name: 'topic', type: 'string', required: true, label: 'Topic' }],
pipeline: {
stages: [
{
id: 'critique',
atoms: ['visual-validation'],
repeat: false,
},
],
},
capabilities: ['prompt:inject'],
},
}, null, 2),
);
await fs.writeFile(
path.join(fixture, 'SKILL.md'),
'---\nname: visual-validation-gate-plugin\ndescription: fixture with failing visual validation\n---\n# Visual validation gate\n',
);
const installResp = await fetch(`${baseUrl}/api/plugins/install`, {
method: 'POST',
headers: { 'content-type': 'application/json', accept: 'text/event-stream' },
body: JSON.stringify({ source: fixture }),
});
await readSseUntilSuccess(installResp);
const projectId = `visual-validation-gate-${Date.now()}`;
const createResp = await fetch(`${baseUrl}/api/projects`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
id: projectId,
name: 'Visual validation gate pipeline e2e',
pluginId: 'visual-validation-gate-plugin',
pluginInputs: { topic: 'artifact rewrite' },
grantCaps: ['pipeline:*'],
}),
});
expect(createResp.status).toBe(200);
const createBody = (await createResp.json()) as {
appliedPluginSnapshotId?: string;
};
expect(createBody.appliedPluginSnapshotId).toBeTruthy();
const seedResp = await fetch(`${baseUrl}/api/projects/${projectId}/files`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: 'index.html', content: '<!doctype html><h1>before</h1>' }),
});
expect(seedResp.status).toBe(200);
await withFakeAgent(
'opencode',
`
const fs = require('node:fs');
if (process.argv.includes('--version')) {
console.log('opencode 0.0.0');
process.exit(0);
}
if (process.argv[2] === 'models') {
console.log('test/model');
process.exit(0);
}
if (process.argv[2] === 'run') {
setTimeout(() => {
fs.writeFileSync('index.html', '<!doctype html><h1>after</h1>');
console.log(JSON.stringify({ type: 'text', part: { text: 'rewritten' } }));
process.exit(0);
}, 150);
}
`,
async () => {
const runResp = await fetch(`${baseUrl}/api/runs`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
agentId: 'opencode',
projectId,
pluginId: 'visual-validation-gate-plugin',
appliedPluginSnapshotId: createBody.appliedPluginSnapshotId,
grantCaps: ['pipeline:*'],
}),
});
expect(runResp.status).toBe(202);
const runBody = (await runResp.json()) as { runId: string };
const eventsResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}/events`, {
headers: { accept: 'text/event-stream' },
});
expect(eventsResp.body).toBeTruthy();
const reader = eventsResp.body!.getReader();
const decoder = new TextDecoder();
let buffer = '';
const events: string[] = [];
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const blocks = buffer.split('\n\n');
buffer = blocks.pop() ?? '';
for (const block of blocks) {
const eventLine = block.split('\n').find((line) => line.startsWith('event: '));
if (!eventLine) continue;
const event = eventLine.slice('event: '.length);
events.push(event);
if (event === 'end') break;
}
if (events.includes('end')) break;
}
expect(seenHtml).toEqual(['<!doctype html><h1>after</h1>']);
expect(events).toContain('pipeline_stage_started');
expect(events).toContain('pipeline_stage_completed');
expect(events.indexOf('pipeline_stage_completed')).toBeGreaterThan(-1);
expect(events.indexOf('end')).toBeGreaterThan(events.indexOf('pipeline_stage_completed'));
const statusResp = await fetch(`${baseUrl}/api/runs/${encodeURIComponent(runBody.runId)}`);
expect(statusResp.status).toBe(200);
const statusBody = (await statusResp.json()) as { status?: string };
expect(statusBody.status).toBe('failed');
},
);
} finally {
clearAtomWorkers();
resetBuiltInAtomWorkersForTests();
registerBuiltInAtomWorkers();
await fs.rm(tmpRoot, { recursive: true, force: true });
}
});
});

View File

@@ -212,9 +212,6 @@ describe('pipeline-runner: Stage D registry runner integration', () => {
runId: 'run-stage-d',
projectId: 'project-1',
conversationId: 'conv-A',
daemonUrl: null,
cwd: null,
entryFile: null,
stage,
iteration,
snapshot: snap2,
@@ -249,9 +246,6 @@ describe('pipeline-runner: Stage D registry runner integration', () => {
runId: 'run-permissive',
projectId: 'project-1',
conversationId: 'conv-A',
daemonUrl: null,
cwd: null,
entryFile: null,
stage,
iteration,
snapshot: snap2,

View File

@@ -1,163 +0,0 @@
import { describe, expect, it } from 'vitest';
import type { AppliedPluginSnapshot, GenUISurfaceSpec, PluginPipeline } from '@open-design/contracts';
import {
splitPipelineByExecutionBoundary,
splitPipelineSnapshotByExecutionBoundary,
} from '../src/plugins/pipeline-schedule.js';
describe('splitPipelineByExecutionBoundary', () => {
it('keeps pre-run-only pipelines intact', () => {
const pipeline: PluginPipeline = {
stages: [
{ id: 'discovery', atoms: ['discovery-question-form'] },
{ id: 'plan', atoms: ['todo-write'] },
],
};
const schedule = splitPipelineByExecutionBoundary(pipeline);
expect(schedule.preRun).toEqual(pipeline);
expect(schedule.postRun).toBeNull();
});
it('defers visual-validation stages until after the run succeeds', () => {
const pipeline: PluginPipeline = {
stages: [
{ id: 'discovery', atoms: ['discovery-question-form'] },
{ id: 'generate', atoms: ['file-write', 'live-artifact'] },
{
id: 'critique',
atoms: ['critique-theater', 'visual-validation'],
repeat: true,
until: 'critique.score>=4 || iterations>=3',
},
],
};
const schedule = splitPipelineByExecutionBoundary(pipeline);
expect(schedule.preRun?.stages.map((stage) => stage.id)).toEqual([
'discovery',
'generate',
]);
expect(schedule.postRun?.stages.map((stage) => stage.id)).toEqual([
'critique',
]);
});
it('keeps the full suffix in post-run order once a post-run atom appears', () => {
const pipeline: PluginPipeline = {
stages: [
{ id: 'direction', atoms: ['discovery-question-form'] },
{ id: 'patch', atoms: ['file-write'] },
{ id: 'critique', atoms: ['critique-theater', 'visual-validation'] },
{ id: 'handoff', atoms: ['handoff'] },
],
};
const schedule = splitPipelineByExecutionBoundary(pipeline);
expect(schedule.preRun?.stages.map((stage) => stage.id)).toEqual([
'direction',
'patch',
]);
expect(schedule.postRun?.stages.map((stage) => stage.id)).toEqual([
'critique',
'handoff',
]);
});
});
describe('splitPipelineSnapshotByExecutionBoundary', () => {
it('keeps triggerless surfaces in pre-run only and stage-scopes the deferred suffix', () => {
const surfaces: GenUISurfaceSpec[] = [
{ id: 'confirm', kind: 'confirmation', persist: 'run' },
{ id: 'direction-form', kind: 'form', persist: 'run', trigger: { stageId: 'direction' } },
{ id: 'critique-form', kind: 'form', persist: 'run', trigger: { stageId: 'critique' } },
{ id: 'handoff-form', kind: 'form', persist: 'run', trigger: { stageId: 'handoff' } },
];
const snapshot = {
snapshotId: 'snap-1',
pluginId: 'sample-plugin',
pluginVersion: '1.0.0',
manifestSourceDigest: 'digest-1',
inputs: {},
resolvedContext: { items: [] },
capabilitiesGranted: [],
capabilitiesRequired: [],
assetsStaged: [],
taskKind: 'new-generation',
appliedAt: 0,
connectorsRequired: [],
connectorsResolved: [],
mcpServers: [],
pipeline: {
stages: [
{ id: 'direction', atoms: ['discovery-question-form'] },
{ id: 'critique', atoms: ['visual-validation'] },
{ id: 'handoff', atoms: ['handoff'] },
],
},
genuiSurfaces: surfaces,
status: 'fresh',
} as AppliedPluginSnapshot;
const split = splitPipelineSnapshotByExecutionBoundary(snapshot);
expect(split.preRun?.pipeline?.stages.map((stage) => stage.id)).toEqual(['direction']);
expect(split.preRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([
'confirm',
'direction-form',
]);
expect(split.postRun?.pipeline?.stages.map((stage) => stage.id)).toEqual([
'critique',
'handoff',
]);
expect(split.postRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([
'critique-form',
'handoff-form',
]);
});
it('raises triggerless surfaces before an all-deferred pipeline starts', () => {
const snapshot = {
snapshotId: 'snap-2',
pluginId: 'sample-plugin',
pluginVersion: '1.0.0',
manifestSourceDigest: 'digest-2',
inputs: {},
resolvedContext: { items: [] },
capabilitiesGranted: [],
capabilitiesRequired: [],
assetsStaged: [],
taskKind: 'new-generation',
appliedAt: 0,
connectorsRequired: [],
connectorsResolved: [],
mcpServers: [],
pipeline: {
stages: [
{ id: 'critique', atoms: ['visual-validation'] },
{ id: 'handoff', atoms: ['handoff'] },
],
},
genuiSurfaces: [
{ id: 'confirm', kind: 'confirmation', persist: 'run' },
{ id: 'critique-form', kind: 'form', persist: 'run', trigger: { stageId: 'critique' } },
{ id: 'handoff-form', kind: 'form', persist: 'run', trigger: { stageId: 'handoff' } },
],
status: 'fresh',
} as AppliedPluginSnapshot;
const split = splitPipelineSnapshotByExecutionBoundary(snapshot);
expect(split.preRun?.pipeline?.stages).toEqual([]);
expect(split.preRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([
'confirm',
]);
expect(split.postRun?.genuiSurfaces?.map((surface) => surface.id)).toEqual([
'critique-form',
'handoff-form',
]);
});
});

View File

@@ -129,9 +129,6 @@ describe('apply: bundled-scenario pipeline fallback (spec §23.3.3)', () => {
]),
});
expect(out.result.pipeline?.stages?.[0]?.id).toBe('discovery');
expect(out.result.pipeline?.stages?.find((stage) => stage.id === 'critique')?.atoms).toEqual([
'critique-theater',
]);
});
it('keeps pipeline undefined when no scenario matches the taskKind', () => {

View File

@@ -1,616 +0,0 @@
import { describe, expect, it } from 'vitest';
import { chmod, mkdir, mkdtemp, readFile, rm, symlink, writeFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import { PNG } from 'pngjs';
import {
resolveVisualValidationChromiumLaunchOptions,
resolvePackagedPlaywrightBrowsersPath,
runVisualValidation,
similarityToCritiqueScore,
} from '../src/plugins/atoms/visual-validation.js';
describe('visual validation atom runner', () => {
it('skips cleanly when no reference images are present', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-skip-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
const result = await runVisualValidation({
cwd,
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(320, 240, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('skipped');
expect(result.signals).toEqual({});
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed when references exist but no HTML entry file is found', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-missing-entry-'));
try {
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('failed');
expect(result.report.entryFile).toBeNull();
expect(result.report.message).toContain('no HTML entry file found');
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json');
const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { status?: string; message?: string };
expect(saved.status).toBe('failed');
expect(saved.message).toContain('no HTML entry file found');
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('compares rendered output against reference screenshots and writes a report', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-compare-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
const png = createFilledPng(200, 120, [255, 255, 255, 255]);
paintRect(png, { x: 40, y: 25, width: 60, height: 30 }, [255, 0, 0, 255]);
await writeFile(outputPath, PNG.sync.write(png));
},
});
expect(result.report.status).toBe('ok');
expect(result.report.comparison?.similarity).toBeLessThan(95);
expect(result.report.comparison?.diffPixels).toBeGreaterThan(0);
expect(result.report.comparison?.suggestions.length).toBeGreaterThan(0);
expect(result.signals['preview.ok']).toBe(true);
expect(result.signals['critique.score']).toBe(3);
const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json');
const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { comparison?: { diffPixels?: number } };
expect(saved.comparison?.diffPixels).toBe(result.report.comparison?.diffPixels);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('honors an explicit entryFile over auto-detected index.html', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-explicit-entry-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>stale</body></html>', 'utf8');
await writeFile(path.join(cwd, 'active.html'), '<!doctype html><html><body>active</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
let capturedEntryFile: string | null = null;
const result = await runVisualValidation({
cwd,
entryFile: 'active.html',
entryUrl: 'about:blank',
captureScreenshot: async ({ entryFile, outputPath }) => {
capturedEntryFile = entryFile;
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(result.report.entryFile).toBe('active.html');
expect(capturedEntryFile).toBe('active.html');
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('uses the daemon preview route instead of file:// when project context is available', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-preview-route-'));
const originalFetch = globalThis.fetch;
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
globalThis.fetch = async (input) => {
expect(String(input)).toBe(
'http://127.0.0.1:7456/api/projects/project-123/preview-url?file=index.html',
);
return new Response(
JSON.stringify({ url: '/api/projects/project-123/preview/scope-123/index.html' }),
{ status: 200, headers: { 'content-type': 'application/json' } },
);
};
let capturedEntryUrl: string | null = null;
const result = await runVisualValidation({
cwd,
projectId: 'project-123',
daemonUrl: 'http://127.0.0.1:7456/',
captureScreenshot: async ({ entryUrl, outputPath }) => {
capturedEntryUrl = entryUrl;
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(capturedEntryUrl).toBe(
'http://127.0.0.1:7456/api/projects/project-123/preview/scope-123/index.html',
);
} finally {
globalThis.fetch = originalFetch;
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed instead of falling back to file:// when preview context is unavailable', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-missing-preview-context-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({ cwd });
expect(result.report.status).toBe('failed');
expect(result.report.message).toContain('requires daemon preview context');
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('captures with the reference dimensions instead of the old clamp bounds', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-reference-viewport-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(1920, 300, [255, 255, 255, 255])),
);
let capturedViewport: { width: number; height: number } | null = null;
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath, viewport }) => {
capturedViewport = viewport;
await writeFile(outputPath, PNG.sync.write(createFilledPng(viewport.width, viewport.height, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(capturedViewport).toEqual({ width: 1920, height: 300 });
expect(result.report.comparison?.referenceWidth).toBe(1920);
expect(result.report.comparison?.actualWidth).toBe(1920);
expect(result.report.comparison?.referenceHeight).toBe(300);
expect(result.report.comparison?.actualHeight).toBe(300);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed when capture throws', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-fail-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async () => {
throw new Error('playwright launch failed');
},
});
expect(result.report.status).toBe('failed');
expect(result.report.message).toContain('playwright launch failed');
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
const reportPath = path.join(cwd, 'critique', 'visual-validation', 'report.json');
const saved = JSON.parse(await readFile(reportPath, 'utf8')) as { status?: string; message?: string };
expect(saved.status).toBe('failed');
expect(saved.message).toContain('playwright launch failed');
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed when the visual-validation artifact directory cannot be created', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-artifact-dir-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await writeFile(path.join(cwd, 'critique'), 'not-a-directory', 'utf8');
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('failed');
expect(result.report.message).toContain('ENOTDIR');
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed when the Playwright browser runtime is unavailable', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-no-browser-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async () => {
throw new Error(
"browserType.launch: Executable doesn't exist at /tmp/ms-playwright/chromium\nPlease run the following command to download new browsers: npx playwright install",
);
},
});
expect(result.report.status).toBe('failed');
expect(result.report.message).toContain("Executable doesn't exist");
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('resolves the packaged Playwright browser cache from OD_RESOURCE_ROOT', () => {
expect(
resolvePackagedPlaywrightBrowsersPath({
OD_RESOURCE_ROOT: '/tmp/open-design/resources',
} as NodeJS.ProcessEnv),
).toBe('/tmp/open-design/resources/ms-playwright');
});
it('preserves an explicit Playwright browser cache override', () => {
expect(
resolvePackagedPlaywrightBrowsersPath({
OD_RESOURCE_ROOT: '/tmp/open-design/resources',
PLAYWRIGHT_BROWSERS_PATH: '/custom/playwright-cache',
} as NodeJS.ProcessEnv),
).toBe('/custom/playwright-cache');
});
it('launches visual validation with Playwright new-headless Chromium', () => {
expect(resolveVisualValidationChromiumLaunchOptions()).toEqual({
channel: 'chromium',
});
});
it('skips ignored dependency trees before recursing for references', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-ignore-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'references'), { recursive: true });
await writeFile(
path.join(cwd, 'references', 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await mkdir(path.join(cwd, 'node_modules', 'huge-package', 'assets'), { recursive: true });
await chmod(path.join(cwd, 'node_modules'), 0o000);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
} finally {
await chmod(path.join(cwd, 'node_modules'), 0o755).catch(() => {});
await rm(cwd, { recursive: true, force: true });
}
});
it('fails closed when reference auto-discovery hits an unreadable non-ignored directory', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-discovery-unreadable-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await mkdir(path.join(cwd, 'private-assets'), { recursive: true });
await writeFile(path.join(cwd, 'private-assets', 'notes.txt'), 'keep out', 'utf8');
await chmod(path.join(cwd, 'private-assets'), 0o000);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('failed');
expect(result.report.message).toContain('EACCES');
expect(result.signals['preview.ok']).toBe(false);
expect(result.signals['critique.score']).toBe(1);
} finally {
await chmod(path.join(cwd, 'private-assets'), 0o755).catch(() => {});
await rm(cwd, { recursive: true, force: true });
}
});
it('only auto-discovers PNG reference screenshots', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-png-only-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(path.join(cwd, 'reference-home.jpg'), 'not-a-png', 'utf8');
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('skipped');
expect(result.report.message).toContain('no reference screenshot found');
expect(result.signals).toEqual({});
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('ignores arbitrary root-level spec-prefixed PNG assets', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-root-spec-asset-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await writeFile(
path.join(cwd, 'special-offer.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('skipped');
expect(result.report.message).toContain('no reference screenshot found');
expect(result.signals).toEqual({});
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('still auto-discovers spec-directory PNG assets', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-spec-dir-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'spec'), { recursive: true });
await writeFile(
path.join(cwd, 'spec', 'special-offer.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(result.report.comparison?.referencePath).toBe('spec/special-offer.png');
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('skips symlinked directories while scanning for references', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-symlink-cycle-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'references'), { recursive: true });
await writeFile(
path.join(cwd, 'references', 'reference-home.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await mkdir(path.join(cwd, 'loop', 'nested'), { recursive: true });
await symlink(path.join(cwd, 'loop'), path.join(cwd, 'loop', 'nested', 'back-to-loop'));
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(result.report.comparison?.referencePath).toBe('references/reference-home.png');
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('keeps per-reference artifacts distinct when basenames collide', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-collisions-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'references'), { recursive: true });
await mkdir(path.join(cwd, 'spec'), { recursive: true });
await writeFile(
path.join(cwd, 'references', 'reference.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await writeFile(
path.join(cwd, 'spec', 'reference.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const captures: string[] = [];
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
captures.push(path.relative(cwd, outputPath));
await writeFile(outputPath, PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])));
},
});
expect(result.report.status).toBe('ok');
expect(captures).toEqual([
'critique/visual-validation/references-reference-1.actual.png',
'critique/visual-validation/spec-reference-2.actual.png',
]);
expect(captures).toContain(result.report.comparison?.actualPath);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('does not treat substring directory names as reference-image segments', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-segment-match-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'assets', 'aspect'), { recursive: true });
await mkdir(path.join(cwd, 'preferences'), { recursive: true });
await writeFile(
path.join(cwd, 'assets', 'aspect', 'hero.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await writeFile(
path.join(cwd, 'preferences', 'panel.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
captureScreenshot: async () => {
throw new Error('visual validation should skip when no reference images are present');
},
});
expect(result.report.status).toBe('skipped');
expect(result.report.message).toContain('no reference screenshot');
expect(result.signals).toEqual({});
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('scores from the worst reference match instead of the best one', async () => {
const cwd = await mkdtemp(path.join(os.tmpdir(), 'od-visual-worst-reference-'));
try {
await writeFile(path.join(cwd, 'index.html'), '<!doctype html><html><body>ok</body></html>', 'utf8');
await mkdir(path.join(cwd, 'references'), { recursive: true });
await writeFile(
path.join(cwd, 'references', 'reference-desktop.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
await writeFile(
path.join(cwd, 'references', 'reference-mobile.png'),
PNG.sync.write(createFilledPng(200, 120, [255, 255, 255, 255])),
);
const result = await runVisualValidation({
cwd,
entryUrl: 'about:blank',
captureScreenshot: async ({ outputPath }) => {
const png = createFilledPng(200, 120, [255, 255, 255, 255]);
if (outputPath.endsWith('reference-mobile-2.actual.png')) {
paintRect(png, { x: 20, y: 20, width: 160, height: 80 }, [255, 0, 0, 255]);
}
await writeFile(outputPath, PNG.sync.write(png));
},
});
expect(result.report.status).toBe('ok');
expect(result.report.comparison?.referencePath).toBe('references/reference-mobile.png');
expect(result.report.comparison?.similarity).toBeLessThan(50);
expect(result.signals['preview.ok']).toBe(true);
expect(result.signals['critique.score']).toBe(1);
} finally {
await rm(cwd, { recursive: true, force: true });
}
});
it('maps similarity bands to critique scores conservatively', () => {
expect(similarityToCritiqueScore(99)).toBe(5);
expect(similarityToCritiqueScore(96)).toBe(4);
expect(similarityToCritiqueScore(90)).toBe(3);
expect(similarityToCritiqueScore(80)).toBe(2);
expect(similarityToCritiqueScore(60)).toBe(1);
});
});
function createFilledPng(
width: number,
height: number,
rgba: readonly [number, number, number, number],
): PNG {
const png = new PNG({ width, height });
for (let i = 0; i < png.data.length; i += 4) {
png.data[i] = rgba[0];
png.data[i + 1] = rgba[1];
png.data[i + 2] = rgba[2];
png.data[i + 3] = rgba[3];
}
return png;
}
function paintRect(
png: PNG,
rect: { x: number; y: number; width: number; height: number },
rgba: readonly [number, number, number, number],
): void {
for (let y = rect.y; y < rect.y + rect.height; y += 1) {
for (let x = rect.x; x < rect.x + rect.width; x += 1) {
const index = (y * png.width + x) << 2;
png.data[index] = rgba[0];
png.data[index + 1] = rgba[1];
png.data[index + 2] = rgba[2];
png.data[index + 3] = rgba[3];
}
}
}

View File

@@ -9,6 +9,6 @@
# 1. Temporarily set the consuming `hash = lib.fakeHash;`
# 2. Run the relevant nix build/flake check
# 3. Copy the expected hash printed by Nix into the matching field below
daemonHash = "sha256-w1y5qrGa/vZtg4LXQvyrUp4a4Rk9x6z7ve4Up65P6cA=";
webHash = "sha256-Uj9HlDpTtO8y/0ykTVkFtM0oukt1zSYsmIp7JZ9NJJc=";
daemonHash = "sha256-AEg1yKQK55U9P5EJPNqWNkF9teKAV0rwV84F8Im2Ir0=";
webHash = "sha256-/VNR08beUFynS6/uZHoA9AlZE8PPPicGAyJJ6Oy7trg=";
}

View File

@@ -1,23 +0,0 @@
---
name: visual-validation
description: Render the current artifact, compare it against reference screenshots, and feed the result into critique scoring.
od:
scenario: new-generation
mode: critique
---
# Visual validation
This atom renders the current project artifact through the daemon preview
route, compares it against discovered or explicit reference screenshots, and
feeds a conservative score back into the critique loop.
## Current state
- The daemon registry executes `visual-validation` as a built-in atom worker.
- Reports are written under `critique/visual-validation/`.
- When no reference screenshots are present, the atom skips without changing
critique signals.
When references exist but the daemon cannot render the artifact, the atom fails
closed by returning a low critique score and `preview.ok: false`.

View File

@@ -1,39 +0,0 @@
{
"$schema": "https://open-design.ai/schemas/plugin.v1.json",
"specVersion": "1.0.0",
"name": "visual-validation",
"title": "Visual validation",
"version": "0.1.0",
"description": "Render the current artifact, compare it against reference screenshots, and feed the result into critique scoring.",
"license": "MIT",
"author": {
"name": "Open Design",
"url": "https://github.com/nexu-io"
},
"homepage": "https://github.com/nexu-io/open-design/tree/main/plugins/_official/atoms/visual-validation",
"tags": [
"atom",
"first-party",
"new-generation",
"tune-collab",
"visual-validation"
],
"compat": {
"agentSkills": [
{
"path": "./SKILL.md"
}
]
},
"od": {
"kind": "atom",
"scenario": "new-generation",
"mode": "critique",
"capabilities": [
"prompt:inject",
"fs:read",
"fs:write",
"subprocess"
]
}
}

View File

@@ -251,7 +251,7 @@
</div>
<script>
/* ---------- FadeIn (IntersectionObserver) ---------- */
document.querySelectorAll('.fade').forEach(el => {
const cfg = (el.dataset.fade || '').split(';').reduce((a,p)=>{const[k,v]=p.split(':');if(k)a[k.trim()]=v;return a;},{});

View File

@@ -236,7 +236,7 @@
</div>
<script>
const BG_IMG = 'https://images.higgs.ai/?default=1&output=webp&url=https%3A%2F%2Fd8j0ntlcm91z4.cloudfront.net%2Fuser_38xzZboKViGWJOttwIXH07lWA1P%2Fhf_20260603_073200_7082add5-f1f8-4873-8696-d6f78a44089b.png&w=1920&q=85';
document.getElementById('hero').style.setProperty('--bg', `url("${BG_IMG}")`);

View File

@@ -1,6 +1,6 @@
---
name: od-new-generation
description: Default reference pipeline for the new-generation taskKind — discovery → plan → generate → critique with critique-theater in the devloop.
description: Default reference pipeline for the new-generation taskKind — discovery → plan → generate → critique with a critique-theater devloop.
od:
scenario: new-generation
mode: scenario
@@ -33,10 +33,7 @@ rather than a code change.
The discovery stage gives the agent a clean surface for clarifying
questions; the plan stage commits to a TodoWrite-backed plan; the
generate stage produces the artifact; the critique stage devloops
until the score converges or the iteration ceiling is hit. Visual
validation stays available for explicit pipelines, but it is not part
of the default critique loop until the daemon can feed post-render
results back into a real retry boundary.
until the score converges or the iteration ceiling is hit.
## Plugins that customise this scenario

View File

@@ -35,8 +35,4 @@ artifact lineage chain stays intact across multi-turn tune cycles.
The handoff stage records `handoffKind: 'patch'` (or
`'deployable-app'` when the user opted in via
`od plugin run --target deployable-app` AND `build-test` ran
successfully somewhere upstream in the project's history). The
visual-validation atom remains available for explicit pipelines, but
the default critique loop stays text-only until the daemon can use a
post-render score to trigger another patch iteration instead of only
failing after the agent has exited.
successfully somewhere upstream in the project's history).

12
pnpm-lock.yaml generated
View File

@@ -101,15 +101,6 @@ importers:
node-pty:
specifier: 1.1.0
version: 1.1.0
pixelmatch:
specifier: 7.2.0
version: 7.2.0
playwright:
specifier: 1.60.0
version: 1.60.0
pngjs:
specifier: 7.0.0
version: 7.0.0
posthog-node:
specifier: 5.34.6
version: 5.34.6
@@ -135,9 +126,6 @@ importers:
'@types/node':
specifier: 20.19.39
version: 20.19.39
'@types/pngjs':
specifier: 6.0.5
version: 6.0.5
typescript:
specifier: 5.9.3
version: 5.9.3

View File

@@ -27,7 +27,7 @@ import {
} from "@open-design/platform";
import type { ToolPackConfig } from "./config.js";
import { copyBundledPlaywrightChromium, copyBundledResourceTrees, linuxResources } from "./resources.js";
import { copyBundledResourceTrees, linuxResources } from "./resources.js";
import { copyOptionalVelaCliBinary } from "./vela-cli.js";
import { electronBuilderVersionForAppVersion, readRuntimeAppVersion } from "./versions.js";
import { processWebSourcemaps } from "./web-sourcemaps.js";
@@ -458,10 +458,6 @@ async function copyResourceTree(config: ToolPackConfig, paths: LinuxPaths): Prom
workspaceRoot: config.workspaceRoot,
resourceRoot: paths.resourceRoot,
});
await copyBundledPlaywrightChromium({
workspaceRoot: config.workspaceRoot,
resourceRoot: paths.resourceRoot,
});
await mkdir(join(paths.resourceRoot, "bin"), { recursive: true });
await cp(process.execPath, join(paths.resourceRoot, "bin", "node"));
await chmod(join(paths.resourceRoot, "bin", "node"), 0o755);

View File

@@ -17,7 +17,7 @@ import {
shouldInstallInternalPackageForMacPrebundle,
shouldUseMacStandalonePrebundle,
} from "../mac-prebundle.js";
import { copyBundledPlaywrightChromium, copyBundledResourceTrees } from "../resources.js";
import { copyBundledResourceTrees } from "../resources.js";
import { copyOptionalVelaCliBinary } from "../vela-cli.js";
import { electronBuilderVersionForAppVersion } from "../versions.js";
import { runEsbuild, runNpmInstall, runPnpm } from "./commands.js";
@@ -138,10 +138,6 @@ export async function copyResourceTree(config: ToolPackConfig, paths: MacPaths):
workspaceRoot: config.workspaceRoot,
resourceRoot: paths.resourceRoot,
});
await copyBundledPlaywrightChromium({
workspaceRoot: config.workspaceRoot,
resourceRoot: paths.resourceRoot,
});
await copyOptionalVelaCliBinary({
platform: "mac",
requireBundled: config.requireVelaCli,

View File

@@ -1,7 +1,6 @@
import { readFileSync } from "node:fs";
import { access, cp, mkdir } from "node:fs/promises";
import { createRequire } from "node:module";
import { basename, dirname, join } from "node:path";
import { cp } from "node:fs/promises";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
function resolveToolsPackRoot(startDir: string): string {
@@ -51,9 +50,6 @@ export const linuxResources = {
desktopTemplate: join(resourcesRoot, "linux", "open-design.desktop.template"),
} as const;
const CHROMIUM_BUNDLE_ROOT_RE = /^chromium(?:_headless_shell)?-\d+$/i;
const HEADED_CHROMIUM_BUNDLE_ROOT_RE = /^chromium-\d+$/i;
const BUNDLED_RESOURCE_TREES = [
{ from: "skills", to: "skills" },
// After the skills/design-templates split (specs/current/skills-and-design-templates.md)
@@ -83,99 +79,3 @@ export async function copyBundledResourceTrees({
});
}
}
export async function copyBundledPlaywrightChromium({
resourceRoot,
sourceExecutablePath,
workspaceRoot,
}: {
resourceRoot: string;
sourceExecutablePath?: string;
workspaceRoot: string;
}): Promise<{ sourceRoots: string[]; targetRoots: string[] }> {
const { sourceRoots } = await resolveBundledPlaywrightChromiumSourceRoots({
sourceExecutablePath,
workspaceRoot,
});
const targetRoots: string[] = [];
for (const sourceRoot of sourceRoots) {
const targetRoot = join(resourceRoot, "ms-playwright", basename(sourceRoot));
await mkdir(dirname(targetRoot), { recursive: true });
await cp(sourceRoot, targetRoot, { recursive: true });
targetRoots.push(targetRoot);
}
return { sourceRoots, targetRoots };
}
export function resolveDaemonPlaywrightChromiumExecutablePath(workspaceRoot: string): string {
const daemonPackagePath = join(workspaceRoot, "apps", "daemon", "package.json");
const requireFromDaemon = createRequire(daemonPackagePath);
const playwrightModule = requireFromDaemon("playwright") as {
chromium?: { executablePath?: () => string };
};
const executablePath = playwrightModule.chromium?.executablePath?.();
if (!executablePath) {
throw new Error("tools-pack: daemon Playwright Chromium executable path is unavailable");
}
return executablePath;
}
function resolveChromiumBundleRoot(executablePath: string): string {
let current = dirname(executablePath);
while (true) {
const name = basename(current);
if (CHROMIUM_BUNDLE_ROOT_RE.test(name)) return current;
const parent = dirname(current);
if (parent === current) break;
current = parent;
}
throw new Error(`tools-pack: unable to locate Chromium bundle root for ${executablePath}`);
}
export async function resolveChromiumBundleRoots(executablePath: string): Promise<string[]> {
const primaryRoot = resolveChromiumBundleRoot(executablePath);
const match = basename(primaryRoot).match(/^chromium(?:_headless_shell)?-(\d+)$/i);
if (!match) return [primaryRoot];
const revision = match[1];
const parent = dirname(primaryRoot);
const roots = [primaryRoot];
const optionalVariants = [
join(parent, `chromium-${revision}`),
join(parent, `chromium_headless_shell-${revision}`),
].filter((variantRoot) => variantRoot !== primaryRoot);
for (const variantRoot of optionalVariants) {
try {
await access(variantRoot);
roots.push(variantRoot);
} catch {
// Some Playwright installs ship only one Chromium variant. Keep
// packaging the variant that actually backs chromium.launch().
}
}
assertBundledChromiumRootsSupportChannelLaunch(roots, executablePath);
return roots;
}
function assertBundledChromiumRootsSupportChannelLaunch(
roots: readonly string[],
executablePath: string,
): void {
if (roots.some((root) => HEADED_CHROMIUM_BUNDLE_ROOT_RE.test(basename(root)))) return;
throw new Error(
`tools-pack: bundled Playwright Chromium for ${executablePath} is missing the chromium-* bundle required by channel: 'chromium'; reinstall Playwright without --only-shell or add the headed Chromium bundle`,
);
}
export async function resolveBundledPlaywrightChromiumSourceRoots({
sourceExecutablePath,
workspaceRoot,
}: {
sourceExecutablePath?: string;
workspaceRoot: string;
}): Promise<{ executablePath: string; sourceRoots: string[] }> {
const executablePath =
sourceExecutablePath ?? resolveDaemonPlaywrightChromiumExecutablePath(workspaceRoot);
await access(executablePath);
const sourceRoots = await resolveChromiumBundleRoots(executablePath);
return { executablePath, sourceRoots };
}

View File

@@ -3,12 +3,7 @@ import { dirname, join } from "node:path";
import { hashJson, hashPath, ToolPackCache } from "../cache.js";
import type { ToolPackConfig } from "../config.js";
import {
copyBundledPlaywrightChromium,
copyBundledResourceTrees,
resolveBundledPlaywrightChromiumSourceRoots,
winResources,
} from "../resources.js";
import { copyBundledResourceTrees, winResources } from "../resources.js";
import {
copyOptionalVelaCliBinary,
resolveOptionalVelaCliBinary,
@@ -26,9 +21,6 @@ async function createResourceTreeCacheKey(config: ToolPackConfig): Promise<strin
velaCliBin == null
? null
: await resolveOptionalVelaCliOpenCodeCompanionTree(velaCliBin);
const playwrightSource = await resolveBundledPlaywrightChromiumSourceRoots({
workspaceRoot: config.workspaceRoot,
});
return hashJson({
assetsCommunityPets: await hashPath(join(config.workspaceRoot, "assets", "community-pets")),
assetsFrames: await hashPath(join(config.workspaceRoot, "assets", "frames")),
@@ -38,12 +30,6 @@ async function createResourceTreeCacheKey(config: ToolPackConfig): Promise<strin
node: "win.resource-tree",
pluginOfficial: await hashPath(join(config.workspaceRoot, "plugins", "_official")),
pluginRegistry: await hashPath(join(config.workspaceRoot, "plugins", "registry")),
playwrightChromium: await Promise.all(
playwrightSource.sourceRoots.map(async (sourceRoot) => ({
hash: await hashPath(sourceRoot),
root: sourceRoot,
})),
),
promptTemplates: await hashPath(join(config.workspaceRoot, "prompt-templates")),
schemaVersion: RESOURCE_TREE_CACHE_SCHEMA_VERSION,
skills: await hashPath(join(config.workspaceRoot, "skills")),
@@ -81,10 +67,6 @@ export async function prepareResourceTree(
workspaceRoot: config.workspaceRoot,
resourceRoot,
});
await copyBundledPlaywrightChromium({
workspaceRoot: config.workspaceRoot,
resourceRoot,
});
await mkdir(join(resourceRoot, "bin"), { recursive: true });
await cp(winResources.sevenZipExe, join(resourceRoot, "bin", "7z.exe"));
await cp(winResources.sevenZipDll, join(resourceRoot, "bin", "7z.dll"));

View File

@@ -14,7 +14,6 @@ import {
} from "../src/mac/app.js";
import { resolveSeededAppConfigPaths, seedPackagedAppConfig, writeLaunchPackagedConfig } from "../src/mac/index.js";
import { resolveMacPaths } from "../src/mac/paths.js";
import { ensureDaemonPlaywrightFixture } from "./playwright-fixture.js";
async function pathExists(path: string): Promise<boolean> {
try {
@@ -155,7 +154,6 @@ describe("seedPackagedAppConfig", () => {
describe("copyResourceTree", () => {
it("does not embed the build machine Node launcher into mac resources", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-mac-"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
try {
const config = makeConfig(root);
const paths = resolveMacPaths(config);
@@ -179,7 +177,6 @@ describe("copyResourceTree", () => {
expect(await pathExists(join(paths.resourceRoot, "bin", "node"))).toBe(false);
} finally {
await playwrightFixture.cleanup();
await rm(root, { force: true, recursive: true });
}
});

View File

@@ -1,85 +0,0 @@
import { chmod, lstat, mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { basename, dirname, join } from "node:path";
import process from "node:process";
import { resolveDaemonPlaywrightChromiumExecutablePath } from "../src/resources.js";
async function pathExists(path: string): Promise<boolean> {
try {
await lstat(path);
return true;
} catch {
return false;
}
}
export async function ensureDaemonPlaywrightFixture(workspaceRoot: string): Promise<{
cleanup: () => Promise<void>;
executablePath: string;
headlessSentinel: string;
headedRoot: string;
headlessRoot: string;
}> {
const originalBrowsersPath = process.env.PLAYWRIGHT_BROWSERS_PATH;
let temporaryBrowsersPath: string | null = null;
let executablePath = resolveDaemonPlaywrightChromiumExecutablePath(workspaceRoot);
let headedRoot = dirname(executablePath);
while (!/^chromium-(\d+)$/i.test(basename(headedRoot))) {
const parent = dirname(headedRoot);
if (parent === headedRoot) {
throw new Error(`tools-pack tests: unexpected Playwright Chromium root ${executablePath}`);
}
headedRoot = parent;
}
const revisionMatch = basename(headedRoot).match(/^chromium-(\d+)$/i);
if (!revisionMatch) {
throw new Error(`tools-pack tests: unexpected Playwright Chromium root ${headedRoot}`);
}
const revision = revisionMatch[1];
let headlessRoot = join(dirname(headedRoot), `chromium_headless_shell-${revision}`);
if (!(await pathExists(headedRoot))) {
temporaryBrowsersPath = await mkdtemp(join(tmpdir(), "open-design-playwright-fixture-"));
process.env.PLAYWRIGHT_BROWSERS_PATH = temporaryBrowsersPath;
executablePath = resolveDaemonPlaywrightChromiumExecutablePath(workspaceRoot);
headedRoot = dirname(executablePath);
while (!/^chromium-(\d+)$/i.test(basename(headedRoot))) {
const parent = dirname(headedRoot);
if (parent === headedRoot) {
throw new Error(`tools-pack tests: unexpected Playwright Chromium root ${executablePath}`);
}
headedRoot = parent;
}
headlessRoot = join(dirname(headedRoot), `chromium_headless_shell-${revision}`);
}
const chromeDir = dirname(executablePath);
const headlessSentinel = join(headlessRoot, "HEADLESS_SENTINEL");
if (!(await pathExists(headedRoot))) {
await mkdir(chromeDir, { recursive: true });
await writeFile(executablePath, "#!/bin/sh\nexit 0\n", "utf8");
await chmod(executablePath, 0o755);
await writeFile(join(chromeDir, "LICENSE"), "license\n", "utf8");
}
if (!(await pathExists(headlessRoot))) {
await mkdir(headlessRoot, { recursive: true });
}
if (!(await pathExists(headlessSentinel))) {
await writeFile(headlessSentinel, "headless shell\n", "utf8");
}
return {
cleanup: async () => {
if (temporaryBrowsersPath != null) {
if (originalBrowsersPath == null) delete process.env.PLAYWRIGHT_BROWSERS_PATH;
else process.env.PLAYWRIGHT_BROWSERS_PATH = originalBrowsersPath;
await rm(temporaryBrowsersPath, { force: true, recursive: true });
}
},
executablePath,
headlessSentinel,
headedRoot,
headlessRoot,
};
}

View File

@@ -11,15 +11,11 @@ import {
writeFile,
} from "node:fs/promises";
import { tmpdir } from "node:os";
import { basename, dirname, join } from "node:path";
import { dirname, join } from "node:path";
import process from "node:process";
import {
copyBundledPlaywrightChromium,
copyBundledResourceTrees,
} from "../src/resources.js";
import { copyBundledResourceTrees } from "../src/resources.js";
import { copyOptionalVelaCliBinary, resolveOptionalVelaCliBinary } from "../src/vela-cli.js";
import { ensureDaemonPlaywrightFixture } from "./playwright-fixture.js";
async function writeFakeOpenCodeCompanion(
source: string,
@@ -332,237 +328,6 @@ describe("copyOptionalVelaCliBinary", () => {
});
});
describe("copyBundledPlaywrightChromium", () => {
it("copies the headed Chromium and headless shell revision trees into packaged resources", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-playwright-"));
const resourceRoot = join(root, "resources", "open-design");
const sourceExecutablePath = join(
root,
"ms-playwright",
"chromium-1234",
"chrome-linux",
"chrome",
);
try {
await mkdir(dirname(sourceExecutablePath), { recursive: true });
await writeFile(sourceExecutablePath, "#!/bin/sh\nexit 0\n", "utf8");
await chmod(sourceExecutablePath, 0o755);
await writeFile(
join(root, "ms-playwright", "chromium-1234", "chrome-linux", "LICENSE"),
"license\n",
"utf8",
);
await mkdir(join(root, "ms-playwright", "chromium_headless_shell-1234", "chrome-headless-shell-linux64"), {
recursive: true,
});
await writeFile(
join(
root,
"ms-playwright",
"chromium_headless_shell-1234",
"chrome-headless-shell-linux64",
"README",
),
"headless shell\n",
"utf8",
);
const copied = await copyBundledPlaywrightChromium({
workspaceRoot: root,
resourceRoot,
sourceExecutablePath,
});
expect(copied.sourceRoots).toEqual([
join(root, "ms-playwright", "chromium-1234"),
join(root, "ms-playwright", "chromium_headless_shell-1234"),
]);
expect(copied.targetRoots).toEqual([
join(resourceRoot, "ms-playwright", "chromium-1234"),
join(resourceRoot, "ms-playwright", "chromium_headless_shell-1234"),
]);
await expect(
readFile(
join(resourceRoot, "ms-playwright", "chromium-1234", "chrome-linux", "LICENSE"),
"utf8",
),
).resolves.toBe("license\n");
await expect(
readFile(
join(
resourceRoot,
"ms-playwright",
"chromium_headless_shell-1234",
"chrome-headless-shell-linux64",
"README",
),
"utf8",
),
).resolves.toBe("headless shell\n");
} finally {
await rm(root, { force: true, recursive: true });
}
});
it("copies the headed Chromium tree even when the headless shell tree is absent", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-playwright-headed-only-"));
const resourceRoot = join(root, "resources", "open-design");
const sourceExecutablePath = join(
root,
"ms-playwright",
"chromium-1234",
"chrome-linux",
"chrome",
);
try {
await mkdir(dirname(sourceExecutablePath), { recursive: true });
await writeFile(sourceExecutablePath, "#!/bin/sh\nexit 0\n", "utf8");
await chmod(sourceExecutablePath, 0o755);
await writeFile(
join(root, "ms-playwright", "chromium-1234", "chrome-linux", "LICENSE"),
"license\n",
"utf8",
);
const copied = await copyBundledPlaywrightChromium({
workspaceRoot: root,
resourceRoot,
sourceExecutablePath,
});
expect(copied.sourceRoots).toEqual([
join(root, "ms-playwright", "chromium-1234"),
]);
expect(copied.targetRoots).toEqual([
join(resourceRoot, "ms-playwright", "chromium-1234"),
]);
await expect(
readFile(
join(resourceRoot, "ms-playwright", "chromium-1234", "chrome-linux", "LICENSE"),
"utf8",
),
).resolves.toBe("license\n");
await expect(
access(join(resourceRoot, "ms-playwright", "chromium_headless_shell-1234")),
).rejects.toThrow();
} finally {
await rm(root, { force: true, recursive: true });
}
});
it("copies the daemon-resolved Playwright revision trees into packaged resources", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-playwright-launch-"));
const resourceRoot = join(root, "resources", "open-design");
const workspaceRoot = process.cwd();
const playwrightFixture = await ensureDaemonPlaywrightFixture(workspaceRoot);
try {
const copied = await copyBundledPlaywrightChromium({
workspaceRoot,
resourceRoot,
});
expect(copied.sourceRoots).toEqual([
playwrightFixture.headedRoot,
playwrightFixture.headlessRoot,
]);
expect(copied.targetRoots).toEqual([
join(resourceRoot, "ms-playwright", basename(playwrightFixture.headedRoot)),
join(resourceRoot, "ms-playwright", basename(playwrightFixture.headlessRoot)),
]);
await expect(
access(
join(
resourceRoot,
"ms-playwright",
basename(playwrightFixture.headlessRoot),
basename(playwrightFixture.headlessSentinel),
),
),
).resolves.toBeUndefined();
} finally {
await playwrightFixture.cleanup();
await rm(root, { force: true, recursive: true });
}
});
it("keeps synthetic Playwright bundles out of the shared cache", async () => {
const originalBrowsersPath = process.env.PLAYWRIGHT_BROWSERS_PATH;
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-playwright-fixture-"));
const workspaceRoot = join(root, "workspace");
const sharedCacheRoot = join(root, "shared-cache");
process.env.PLAYWRIGHT_BROWSERS_PATH = sharedCacheRoot;
await mkdir(join(workspaceRoot, "apps", "daemon"), { recursive: true });
await writeFile(
join(workspaceRoot, "apps", "daemon", "package.json"),
"{\"name\":\"@open-design/daemon-test-fixture\"}\n",
"utf8",
);
await mkdir(join(workspaceRoot, "apps", "daemon", "node_modules", "playwright"), {
recursive: true,
});
await writeFile(
join(workspaceRoot, "apps", "daemon", "node_modules", "playwright", "index.js"),
[
"const { join } = require('node:path');",
"exports.chromium = {",
" executablePath() {",
" const root = process.env.PLAYWRIGHT_BROWSERS_PATH ?? join(__dirname, '..', '..', '..', '..', 'shared-cache');",
" return join(root, 'chromium-1234', 'chrome-linux', 'chrome');",
" },",
"};",
"",
].join("\n"),
"utf8",
);
const playwrightFixture = await ensureDaemonPlaywrightFixture(workspaceRoot);
try {
expect(playwrightFixture.headedRoot.startsWith(sharedCacheRoot)).toBe(false);
await expect(access(playwrightFixture.executablePath)).resolves.toBeUndefined();
} finally {
await playwrightFixture.cleanup();
await expect(access(playwrightFixture.headedRoot)).rejects.toThrow();
expect(process.env.PLAYWRIGHT_BROWSERS_PATH).toBe(sharedCacheRoot);
await rm(root, { force: true, recursive: true });
if (originalBrowsersPath == null) delete process.env.PLAYWRIGHT_BROWSERS_PATH;
else process.env.PLAYWRIGHT_BROWSERS_PATH = originalBrowsersPath;
}
});
it("fails when only the headless shell bundle is available for a channel launch", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-tools-pack-playwright-shell-only-"));
const resourceRoot = join(root, "resources", "open-design");
const sourceExecutablePath = join(
root,
"ms-playwright",
"chromium_headless_shell-1234",
"chrome-headless-shell-linux64",
"headless_shell",
);
try {
await mkdir(dirname(sourceExecutablePath), { recursive: true });
await writeFile(sourceExecutablePath, "#!/bin/sh\nexit 0\n", "utf8");
await chmod(sourceExecutablePath, 0o755);
await expect(
copyBundledPlaywrightChromium({
workspaceRoot: root,
resourceRoot,
sourceExecutablePath,
}),
).rejects.toThrow(/missing the chromium-\* bundle required by channel: 'chromium'/);
} finally {
await rm(root, { force: true, recursive: true });
}
});
});
describe("resolveOptionalVelaCliBinary", () => {
it("prefers OPEN_DESIGN_VELA_CLI_BIN over the npm resolver", async () => {
await expect(

View File

@@ -1,7 +1,6 @@
import { chmod, mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { basename, dirname, join } from "node:path";
import process from "node:process";
import { dirname, join } from "node:path";
import { describe, expect, it } from "vitest";
@@ -9,7 +8,6 @@ import { ToolPackCache } from "../src/cache.js";
import type { ToolPackConfig } from "../src/config.js";
import { prepareResourceTree } from "../src/win/resources.js";
import type { WinPaths } from "../src/win/types.js";
import { ensureDaemonPlaywrightFixture } from "./playwright-fixture.js";
async function writeFakeOpenCodeCompanion(
source: string,
@@ -65,7 +63,6 @@ describe("prepareResourceTree", () => {
const workspaceRoot = join(root, "workspace");
const resourceRoot = join(root, "materialized", "open-design");
const cache = new ToolPackCache(join(root, "cache"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
const config = { workspaceRoot } as ToolPackConfig;
const paths = { resourceRoot } as WinPaths;
const templatePath = join(
@@ -103,10 +100,9 @@ describe("prepareResourceTree", () => {
"miss",
]);
} finally {
await playwrightFixture.cleanup();
await rm(root, { force: true, recursive: true });
}
}, 15_000);
});
it("copies a configured Vela CLI binary into the Windows resource tree", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-win-vela-"));
@@ -114,7 +110,6 @@ describe("prepareResourceTree", () => {
const resourceRoot = join(root, "materialized", "open-design");
const source = join(root, "source", "vela.exe");
const cache = new ToolPackCache(join(root, "cache"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
const config = { workspaceRoot } as ToolPackConfig;
const paths = { resourceRoot } as WinPaths;
const originalVelaBin = process.env.OPEN_DESIGN_VELA_CLI_BIN;
@@ -135,19 +130,17 @@ describe("prepareResourceTree", () => {
readFile(join(resourceRoot, "bin", "libexec", "opencode", "opencode"), "utf8"),
).resolves.toBe("fake opencode\n");
} finally {
await playwrightFixture.cleanup();
if (originalVelaBin == null) delete process.env.OPEN_DESIGN_VELA_CLI_BIN;
else process.env.OPEN_DESIGN_VELA_CLI_BIN = originalVelaBin;
await rm(root, { force: true, recursive: true });
}
}, 15_000);
});
it("fails strict Windows resource preparation when configured Vela CLI is missing", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-win-vela-strict-"));
const workspaceRoot = join(root, "workspace");
const resourceRoot = join(root, "materialized", "open-design");
const cache = new ToolPackCache(join(root, "cache"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
const config = {
workspaceRoot,
requireVelaCli: true,
@@ -162,12 +155,11 @@ describe("prepareResourceTree", () => {
prepareResourceTree(config, paths, cache, { materialize: true }),
).rejects.toThrow();
} finally {
await playwrightFixture.cleanup();
if (originalVelaBin == null) delete process.env.OPEN_DESIGN_VELA_CLI_BIN;
else process.env.OPEN_DESIGN_VELA_CLI_BIN = originalVelaBin;
await rm(root, { force: true, recursive: true });
}
}, 15_000);
});
it("invalidates the Windows resource tree cache when the Vela companion changes", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-win-vela-companion-"));
@@ -175,7 +167,6 @@ describe("prepareResourceTree", () => {
const resourceRoot = join(root, "materialized", "open-design");
const source = join(root, "source", "vela.exe");
const cache = new ToolPackCache(join(root, "cache"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
const config = { workspaceRoot } as ToolPackConfig;
const paths = { resourceRoot } as WinPaths;
const originalVelaBin = process.env.OPEN_DESIGN_VELA_CLI_BIN;
@@ -211,52 +202,9 @@ describe("prepareResourceTree", () => {
"miss",
]);
} finally {
await playwrightFixture.cleanup();
if (originalVelaBin == null) delete process.env.OPEN_DESIGN_VELA_CLI_BIN;
else process.env.OPEN_DESIGN_VELA_CLI_BIN = originalVelaBin;
await rm(root, { force: true, recursive: true });
}
}, 15_000);
it("invalidates the Windows resource tree cache when the Playwright bundle changes", async () => {
const root = await mkdtemp(join(tmpdir(), "open-design-win-playwright-bundle-"));
const workspaceRoot = join(root, "workspace");
const resourceRoot = join(root, "materialized", "open-design");
const cache = new ToolPackCache(join(root, "cache"));
const playwrightFixture = await ensureDaemonPlaywrightFixture(process.cwd());
const materializedReadme = join(
resourceRoot,
"ms-playwright",
basename(playwrightFixture.headlessRoot),
basename(playwrightFixture.headlessSentinel),
);
const config = { workspaceRoot } as ToolPackConfig;
const paths = { resourceRoot } as WinPaths;
try {
await createWorkspaceFixture(workspaceRoot);
await writeFile(playwrightFixture.headlessSentinel, "headless shell one\n", "utf8");
await prepareResourceTree(config, paths, cache, { materialize: true });
await expect(readFile(materializedReadme, "utf8")).resolves.toBe(
"headless shell one\n",
);
await writeFile(playwrightFixture.headlessSentinel, "headless shell two\n", "utf8");
await prepareResourceTree(config, paths, cache, { materialize: true });
await expect(readFile(materializedReadme, "utf8")).resolves.toBe(
"headless shell two\n",
);
expect(cache.report().entries.map((entry) => entry.status)).toEqual([
"miss",
"miss",
]);
} finally {
await playwrightFixture.cleanup();
await rm(root, { force: true, recursive: true });
}
}, 15_000);
});
});