fix(sdk): mark failed jobs terminal, correct empty-query chroma flag, externalize zod/mcp

Addresses greptile review on PR #3077:
- generate(): a provider crash or parse error left the job stuck in
  'processing', which the queued-only guard can never reclaim. Now
  transition processing->failed with last_error before re-throwing.
- search(): empty-query path returned chroma:true without consulting
  Chroma; now chroma:false (not degraded — intended filter-only path).
- tsup: zod + @modelcontextprotocol/sdk are prod deps statically imported
  by the SDK; mark them external so they are not inlined into the bundle.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Newman
2026-06-29 15:04:09 -07:00
parent f42bead214
commit c348d95421
2 changed files with 72 additions and 43 deletions

View File

@@ -968,14 +968,23 @@ export async function createCmemClient(options: CmemClientOptions): Promise<Cmem
// Step 3: load the project for the prompt's `projectName`.
const project = await repos.projects.getByIdForTeam(projectId, teamId);
// Steps 45: call the provider and persist. Both can throw while the
// row is in 'processing', and generate()'s queued-only guard can
// never reclaim a 'processing' row — so any failure here is marked
// terminally 'failed' (a legal processing→failed transition) before
// re-throwing. That leaves a diagnosable row with the error recorded
// in last_error instead of one stuck in 'processing' forever.
let providerResult: ServerGenerationResult;
let outcome: Extract<
Awaited<ReturnType<typeof processGeneratedResponse>>,
{ kind: 'completed' }
>;
try {
// Step 4: call the provider. The lifted core mirrors
// ProviderObservationGenerator.ts:200-209 — no BullMQ payload, no
// AbortSignal (consumers control their own timeouts via the
// provider's fetchImpl), no scope/revocation audit.
// A provider crash propagates verbatim. The row stays in 'processing'
// so a future caller can retry or admin-resolve it, and the caller can
// classify the error (transient vs auth_invalid vs unrecoverable).
const providerResult: ServerGenerationResult = await provider.generate({
providerResult = await provider.generate({
job: lockedJob,
events: loadedEvents,
project: {
@@ -1002,16 +1011,32 @@ export async function createCmemClient(options: CmemClientOptions): Promise<Cmem
if (providerResult.modelId !== undefined) {
persistInput.modelId = providerResult.modelId;
}
const outcome = await processGeneratedResponse(persistInput);
const persisted = await processGeneratedResponse(persistInput);
if (outcome.kind === 'parse_error') {
// The provider returned text we couldn't parse; mark a clear error
// for the caller. The job row is left in 'processing' so an admin
// can investigate. Mirrors ProviderObservationGenerator.ts:229-238.
if (persisted.kind === 'parse_error') {
// The provider returned text we couldn't parse. Surface a clear
// error; the catch below transitions the row to 'failed'.
throw new Error(
`cmem-sdk: generation parse error for job ${outcome.jobId}: ${outcome.reason}`
`cmem-sdk: generation parse error for job ${persisted.jobId}: ${persisted.reason}`
);
}
outcome = persisted;
} catch (err) {
await repos.observationGenerationJobs
.transitionStatus({
id: jobId,
projectId,
teamId,
status: 'failed',
lastError: { message: err instanceof Error ? err.message : String(err) },
})
.catch(() => {
// The row was already moved on (e.g. completed inside step 5's
// own transaction, or claimed by another worker). Nothing to
// recover; preserve the original error for the caller.
});
throw err;
}
// Step 6 (Phase 6): index the freshly-persisted observations into
// Chroma so subsequent client.search() calls can find them. Postgres
@@ -1060,14 +1085,16 @@ export async function createCmemClient(options: CmemClientOptions): Promise<Cmem
// Empty-query path — no semantic intent to express. Mirror the
// SearchManager filter-only branch (SearchManager.ts:165-176) by
// returning the most recent observations for this tenant.
// returning the most recent observations for this tenant. Chroma is
// never consulted here, so `chroma: false` (it is NOT degraded — this
// is the intended filter-only behavior, not a Chroma failure).
if (query.trim().length === 0) {
const observations = await repos.observations.listByProject({
projectId,
teamId,
limit,
});
return { observations, chroma: true, degraded: false };
return { observations, chroma: false, degraded: false };
}
// Default path — Chroma semantic. Per plan §6 line 240:

View File

@@ -21,6 +21,8 @@ export default defineConfig({
// against the installed `claude-mem` package's prod deps.
external: [
'pg',
'zod',
'@modelcontextprotocol/sdk',
'@anthropic-ai/sdk',
/^node:/,
],