From 84e1732d28e2d6439b2ef3d3cc21bfcb7041b3db Mon Sep 17 00:00:00 2001 From: fullex <106392080+0xfullex@users.noreply.github.com> Date: Thu, 21 May 2026 10:54:59 +0800 Subject: [PATCH] refactor(knowledge): migrate to JobManager (#15213) --- .changeset/libsql-replace-by-external-id.md | 5 + .../libsql/src/LibSQLVectorStore.ts | 56 +- .../libsql/tests/LibSQLVectorStore.test.ts | 160 ++ .../data/services/KnowledgeItemService.ts | 59 + .../__tests__/KnowledgeItemService.test.ts | 51 + .../KnowledgeOrchestrationService.ts | 47 +- .../KnowledgeOrchestrationService.test.ts | 78 +- .../knowledge/queue/KnowledgeQueueManager.ts | 349 --- .../__tests__/KnowledgeQueueManager.test.ts | 704 ------ .../__tests__/KnowledgeQueueManager.types.ts | 163 -- src/main/services/knowledge/queue/types.ts | 51 - .../runtime/KnowledgeRuntimeService.ts | 676 ++---- .../__tests__/KnowledgeRuntimeService.test.ts | 1893 +++-------------- .../knowledge/runtime/utils/prepare.ts | 4 +- .../__tests__/indexLeafJobHandler.test.ts | 223 ++ .../__tests__/prepareRootJobHandler.test.ts | 226 ++ .../knowledge/tasks/indexLeafJobHandler.ts | 147 ++ src/main/services/knowledge/tasks/jobTypes.ts | 28 + .../knowledge/tasks/prepareRootJobHandler.ts | 178 ++ src/main/services/knowledge/types/items.ts | 3 + src/main/services/knowledge/utils/items.ts | 6 +- .../services/knowledge/vectorstore/types.ts | 10 +- .../2026-05-20-knowledge-job-auto-recovery.md | 30 + .../knowledge/knowledge-backend-decisions.md | 161 +- .../docs/knowledge/knowledge-todo.md | 22 +- 25 files changed, 1761 insertions(+), 3569 deletions(-) create mode 100644 .changeset/libsql-replace-by-external-id.md delete mode 100644 src/main/services/knowledge/queue/KnowledgeQueueManager.ts delete mode 100644 src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.test.ts delete mode 100644 src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.types.ts delete mode 100644 src/main/services/knowledge/queue/types.ts create mode 100644 src/main/services/knowledge/tasks/__tests__/indexLeafJobHandler.test.ts create mode 100644 src/main/services/knowledge/tasks/__tests__/prepareRootJobHandler.test.ts create mode 100644 src/main/services/knowledge/tasks/indexLeafJobHandler.ts create mode 100644 src/main/services/knowledge/tasks/jobTypes.ts create mode 100644 src/main/services/knowledge/tasks/prepareRootJobHandler.ts create mode 100644 v2-refactor-temp/docs/breaking-changes/2026-05-20-knowledge-job-auto-recovery.md diff --git a/.changeset/libsql-replace-by-external-id.md b/.changeset/libsql-replace-by-external-id.md new file mode 100644 index 0000000000..0995d04ce1 --- /dev/null +++ b/.changeset/libsql-replace-by-external-id.md @@ -0,0 +1,5 @@ +--- +'@vectorstores/libsql': patch +--- + +Add `LibSQLVectorStore.replaceByExternalId(externalId, nodes)` — an atomic DELETE + INSERT inside a single libSQL `client.batch(..., 'write')` transaction. Crash-retrying a caller that previously wrote chunks for the same `external_id` no longer leaves orphan chunks (the transaction wipes the prior set atomically), and never destroys pre-existing chunks on insert failure (the transaction rolls back). diff --git a/packages/vectorstores/libsql/src/LibSQLVectorStore.ts b/packages/vectorstores/libsql/src/LibSQLVectorStore.ts index 0605021879..0fb75ae148 100644 --- a/packages/vectorstores/libsql/src/LibSQLVectorStore.ts +++ b/packages/vectorstores/libsql/src/LibSQLVectorStore.ts @@ -297,13 +297,10 @@ export class LibSQLVectorStore extends BaseVectorStore { } } - async add(embeddingResults: BaseNode[]): Promise { - if (embeddingResults.length === 0) { - console.warn('Empty list sent to LibSQLVectorStore::add') - return [] - } - - await this.ensureInitialized() + private buildInsertStatement(embeddingResults: BaseNode[]): { + statement: InStatement + insertedIds: string[] + } { const data = this.getDataToInsert(embeddingResults) const placeholders = data @@ -327,9 +324,50 @@ export class LibSQLVectorStore extends BaseVectorStore { const flattenedParams = data.flat() const validParams = toInArgs(flattenedParams) - const statement: InStatement = { sql, args: validParams } + return { + statement: { sql, args: validParams }, + insertedIds: data.map((row) => String(row[0])) + } + } + + async add(embeddingResults: BaseNode[]): Promise { + if (embeddingResults.length === 0) { + console.warn('Empty list sent to LibSQLVectorStore::add') + return [] + } + + await this.ensureInitialized() + const { statement, insertedIds } = this.buildInsertStatement(embeddingResults) await this.clientInstance.execute(statement) - return data.map((row) => String(row[0])) + return insertedIds + } + + /** + * Atomically replace all chunks bound to a given `external_id` (i.e. an + * item/document) with a new set of chunks. DELETE + INSERT execute inside a + * single libSQL transaction (`client.batch(..., 'write')`): if INSERT fails + * the DELETE is rolled back, so existing chunks are never lost on partial + * failure. Crash-retrying a handler that calls this method is therefore + * idempotent — chunks always reflect the latest successful embedding. + */ + async replaceByExternalId(externalId: string, embeddingResults: BaseNode[]): Promise { + await this.ensureInitialized() + + const collectionCriteria = this.collection.length ? 'AND collection = ?' : '' + const deleteArgs = this.collection.length ? [externalId, this.collection] : [externalId] + const deleteStatement: InStatement = { + sql: `DELETE FROM ${this.tableName} WHERE external_id = ? ${collectionCriteria}`, + args: toInArgs(deleteArgs) + } + + if (embeddingResults.length === 0) { + await this.clientInstance.batch([deleteStatement], 'write') + return [] + } + + const { statement: insertStatement, insertedIds } = this.buildInsertStatement(embeddingResults) + await this.clientInstance.batch([deleteStatement, insertStatement], 'write') + return insertedIds } async delete(refDocId: string, _deleteKwargs?: object): Promise { diff --git a/packages/vectorstores/libsql/tests/LibSQLVectorStore.test.ts b/packages/vectorstores/libsql/tests/LibSQLVectorStore.test.ts index 2c95d18d3f..68c8c1852c 100644 --- a/packages/vectorstores/libsql/tests/LibSQLVectorStore.test.ts +++ b/packages/vectorstores/libsql/tests/LibSQLVectorStore.test.ts @@ -1286,6 +1286,166 @@ describe('LibSQLVectorStore', () => { }) }) + describe('replaceByExternalId', () => { + it('should atomically replace chunks bound to an external_id', async () => { + await store.add([ + new TextNode({ + id_: 'old-chunk-1', + text: 'old chunk 1 content', + embedding: [0.1, 0.2], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }), + new TextNode({ + id_: 'old-chunk-2', + text: 'old chunk 2 content', + embedding: [0.3, 0.4], + metadata: { itemId: 'item-1', chunkIndex: 1 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + + const newIds = await store.replaceByExternalId('item-1', [ + new TextNode({ + id_: 'new-chunk-1', + text: 'replacement chunk', + embedding: [0.9, 0.1], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + + expect(newIds).toEqual(['new-chunk-1']) + + const rows = await client.execute( + "SELECT id, external_id FROM test_embeddings WHERE external_id = 'item-1' ORDER BY id" + ) + expect(rows.rows).toHaveLength(1) + expect(rows.rows[0]).toMatchObject({ id: 'new-chunk-1', external_id: 'item-1' }) + }) + + it('should preserve old chunks when insert phase fails (transaction rollback)', async () => { + await store.add([ + new TextNode({ + id_: 'preserved-chunk-1', + text: 'must survive rollback', + embedding: [0.1, 0.2], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }), + new TextNode({ + id_: 'preserved-chunk-2', + text: 'also must survive', + embedding: [0.3, 0.4], + metadata: { itemId: 'item-1', chunkIndex: 1 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + + const batchSpy = vi.spyOn(client, 'batch').mockImplementation(async () => { + throw new Error('simulated batch failure') + }) + + await expect( + store.replaceByExternalId('item-1', [ + new TextNode({ + id_: 'should-not-appear', + text: 'this should never be persisted', + embedding: [0.5, 0.6], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + ).rejects.toThrow('simulated batch failure') + + batchSpy.mockRestore() + + const rows = await client.execute("SELECT id FROM test_embeddings WHERE external_id = 'item-1' ORDER BY id") + expect(rows.rows.map((row) => row.id)).toEqual(['preserved-chunk-1', 'preserved-chunk-2']) + }) + + it('should issue batch with transactionMode "write"', async () => { + const batchSpy = vi.spyOn(client, 'batch') + + await store.replaceByExternalId('item-1', [ + new TextNode({ + id_: 'tx-mode-chunk', + text: 'verify tx mode', + embedding: [0.1, 0.2], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + + expect(batchSpy).toHaveBeenCalledTimes(1) + expect(batchSpy.mock.calls[0]?.[1]).toBe('write') + batchSpy.mockRestore() + }) + + it('should treat empty node list as delete-by-external_id', async () => { + await store.add([ + new TextNode({ + id_: 'doomed-chunk', + text: 'will be cleared', + embedding: [0.1, 0.2], + metadata: { itemId: 'item-1', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } } + }) + ]) + + const ids = await store.replaceByExternalId('item-1', []) + expect(ids).toEqual([]) + + const rows = await client.execute("SELECT id FROM test_embeddings WHERE external_id = 'item-1'") + expect(rows.rows).toHaveLength(0) + }) + + it('should scope replace by collection', async () => { + const otherCollectionStore = new LibSQLVectorStore({ + client, + tableName: 'test_embeddings', + dimensions: 2, + collection: 'other' + }) + await otherCollectionStore.add([ + new TextNode({ + id_: 'other-collection-chunk', + text: 'do not touch', + embedding: [0.1, 0.2], + metadata: { itemId: 'item-shared', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-shared', metadata: {} } } + }) + ]) + + await store.add([ + new TextNode({ + id_: 'default-chunk-original', + text: 'default collection original', + embedding: [0.3, 0.4], + metadata: { itemId: 'item-shared', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-shared', metadata: {} } } + }) + ]) + + await store.replaceByExternalId('item-shared', [ + new TextNode({ + id_: 'default-chunk-replaced', + text: 'default collection replaced', + embedding: [0.5, 0.6], + metadata: { itemId: 'item-shared', chunkIndex: 0 }, + relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-shared', metadata: {} } } + }) + ]) + + const rows = await client.execute( + "SELECT id, collection FROM test_embeddings WHERE external_id = 'item-shared' ORDER BY collection, id" + ) + expect(rows.rows).toHaveLength(2) + expect(rows.rows[0]).toMatchObject({ id: 'default-chunk-replaced', collection: store.getCollection() }) + expect(rows.rows[1]).toMatchObject({ id: 'other-collection-chunk', collection: 'other' }) + }) + }) + describe('listByExternalId', () => { it('should list documents by external_id in chunk order without embeddings', async () => { await store.add([ diff --git a/src/main/data/services/KnowledgeItemService.ts b/src/main/data/services/KnowledgeItemService.ts index c588d06832..f86a255d81 100644 --- a/src/main/data/services/KnowledgeItemService.ts +++ b/src/main/data/services/KnowledgeItemService.ts @@ -249,6 +249,65 @@ export class KnowledgeItemService { }) } + // TODO: wrap the id collection and row fetch in a single db.transaction so a + // concurrent delete between the two queries cannot surface as dataInconsistent. + // Sibling methods getDescendantItems / getLeafDescendantItems share the same + // two-query shape and the same race; fix all three together. + async getDescendantAndSelfItems(baseId: string, rootIds: string[]): Promise { + const subtreeIds = await this.getDescendantAndSelfIds(baseId, rootIds) + + if (subtreeIds.length === 0) { + return [] + } + + const rows = await this.db + .select() + .from(knowledgeItemTable) + .where(and(eq(knowledgeItemTable.baseId, baseId), inArray(knowledgeItemTable.id, subtreeIds))) + const rowsById = new Map(rows.map((row) => [row.id, row])) + + return subtreeIds.map((id) => { + const row = rowsById.get(id) + + if (!row) { + throw DataApiErrorFactory.dataInconsistent('KnowledgeItem', `Subtree row missing for id '${id}'`) + } + + return rowToKnowledgeItem(row) + }) + } + + private async getDescendantAndSelfIds(baseId: string, rootIds: string[]): Promise { + const uniqueRootIds = [...new Set(rootIds)] + + if (uniqueRootIds.length === 0) { + return [] + } + + const rows = await this.db.all<{ id: string }>(sql` + WITH RECURSIVE subtree AS ( + SELECT id + FROM knowledge_item + WHERE base_id = ${baseId} + AND id IN (${sql.join( + uniqueRootIds.map((id) => sql`${id}`), + sql`, ` + )}) + + UNION ALL + + SELECT child.id + FROM knowledge_item child + INNER JOIN subtree parent ON child.group_id = parent.id + WHERE child.base_id = ${baseId} + ) + SELECT DISTINCT id + FROM subtree + `) + + return rows.map((row) => row.id) + } + private async getDescendantIds(baseId: string, rootIds: string[]): Promise { const uniqueRootIds = [...new Set(rootIds)] diff --git a/src/main/data/services/__tests__/KnowledgeItemService.test.ts b/src/main/data/services/__tests__/KnowledgeItemService.test.ts index 3feaeb23a6..771f87ff68 100644 --- a/src/main/data/services/__tests__/KnowledgeItemService.test.ts +++ b/src/main/data/services/__tests__/KnowledgeItemService.test.ts @@ -479,6 +479,57 @@ describe('KnowledgeItemService', () => { }) }) + describe('getDescendantAndSelfItems', () => { + it('returns every descendant in the requested subtrees plus the roots themselves', async () => { + await seedItem({ id: 'dir-root', type: 'directory', data: { source: '/root', path: '/root' } }) + await seedItem({ + id: 'dir-child', + groupId: 'dir-root', + type: 'directory', + data: { source: '/root/child', path: '/root/child' } + }) + await seedItem({ + id: 'file-child', + groupId: 'dir-child', + type: 'file', + data: createFileItemData('file-child') + }) + await seedItem({ + id: 'note-root', + type: 'note', + data: { source: 'root note', content: 'root note' } + }) + + const result = await service.getDescendantAndSelfItems('kb-1', ['dir-root', 'note-root', 'missing']) + + expect(result.map((item) => item.id).sort()).toEqual(['dir-child', 'dir-root', 'file-child', 'note-root']) + }) + + it('deduplicates when an ancestor and its descendant are both passed as roots', async () => { + await seedItem({ id: 'dir-root', type: 'directory', data: { source: '/root', path: '/root' } }) + await seedItem({ + id: 'dir-child', + groupId: 'dir-root', + type: 'directory', + data: { source: '/root/child', path: '/root/child' } + }) + await seedItem({ + id: 'file-child', + groupId: 'dir-child', + type: 'file', + data: createFileItemData('file-child') + }) + + const result = await service.getDescendantAndSelfItems('kb-1', ['dir-root', 'dir-child']) + + expect(result.map((item) => item.id).sort()).toEqual(['dir-child', 'dir-root', 'file-child']) + }) + + it('returns an empty list when no roots are provided', async () => { + await expect(service.getDescendantAndSelfItems('kb-1', [])).resolves.toEqual([]) + }) + }) + describe('updateStatus', () => { async function getItemRow(id: string) { const [row] = await dbh.db.select().from(knowledgeItemTable).where(eq(knowledgeItemTable.id, id)).limit(1) diff --git a/src/main/services/knowledge/KnowledgeOrchestrationService.ts b/src/main/services/knowledge/KnowledgeOrchestrationService.ts index 43d3240240..5df3d2566b 100644 --- a/src/main/services/knowledge/KnowledgeOrchestrationService.ts +++ b/src/main/services/knowledge/KnowledgeOrchestrationService.ts @@ -16,7 +16,6 @@ import { } from '@shared/data/types/knowledge' import { IpcChannel } from '@shared/IpcChannel' -import { failItems } from './runtime/utils/cleanup' import { KnowledgeRuntimeAddItemsPayloadSchema, KnowledgeRuntimeBasePayloadSchema, @@ -117,39 +116,33 @@ export class KnowledgeOrchestrationService extends BaseService { async deleteBase(baseId: string): Promise { const runtime = application.get('KnowledgeRuntimeService') - const interruptedItemIds = await runtime.deleteBase(baseId) + + // Cancel everything queued for this base, then wait up to 35s for Layer 3 + // locks to drain. If the wait times out a wedged handler can still write + // to the libSQL file via replaceByExternalId — but the artifact delete + // below removes the whole file, so any such orphan rows go with it. + await runtime.cancelAllJobsForBase(baseId) + await runtime.waitForBaseWriteLocks(baseId, 35_000) + + // Artifact delete first so a failure here leaves the SQLite row in place + // and the user can retry deletion from the UI. The reverse order would + // strand orphan vector files on disk with no UI affordance to clean up. + try { + await runtime.deleteBaseArtifacts(baseId) + } catch (error) { + const normalizedError = error instanceof Error ? error : new Error(String(error)) + logger.error('Failed to delete knowledge base vector artifacts', normalizedError, { baseId }) + throw error + } try { await knowledgeBaseService.delete(baseId) } catch (error) { const normalizedError = error instanceof Error ? error : new Error(String(error)) - try { - await failItems(interruptedItemIds, normalizedError.message) - } catch (failureStateError) { - logger.error( - 'Failed to persist runtime item failure state after knowledge base deletion failed', - failureStateError instanceof Error ? failureStateError : new Error(String(failureStateError)), - { - baseId, - interruptedItemIds, - deleteError: normalizedError.message - } - ) - } - throw error - } - - try { - await runtime.deleteBaseArtifacts(baseId) - } catch (error) { - const normalizedError = error instanceof Error ? error : new Error(String(error)) - logger.error('Failed to delete knowledge base vector artifacts after SQLite deletion', normalizedError, { - baseId, - interruptedItemIds - }) + logger.error('Failed to delete knowledge base SQLite row after artifact cleanup', normalizedError, { baseId }) throw DataApiErrorFactory.invalidOperation( 'deleteBase', - `SQLite knowledge base was deleted, but vector artifact cleanup failed: ${normalizedError.message}` + `Vector artifacts were deleted, but SQLite knowledge base cleanup failed: ${normalizedError.message}` ) } } diff --git a/src/main/services/knowledge/__tests__/KnowledgeOrchestrationService.test.ts b/src/main/services/knowledge/__tests__/KnowledgeOrchestrationService.test.ts index 50ad36a9ab..4ccef450b8 100644 --- a/src/main/services/knowledge/__tests__/KnowledgeOrchestrationService.test.ts +++ b/src/main/services/knowledge/__tests__/KnowledgeOrchestrationService.test.ts @@ -7,15 +7,15 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { runtimeAddItemsMock, + runtimeCancelAllJobsForBaseMock, runtimeCreateBaseMock, runtimeDeleteBaseArtifactsMock, - runtimeDeleteBaseMock, runtimeDeleteItemChunkMock, runtimeDeleteItemsMock, runtimeListItemChunksMock, runtimeReindexItemsMock, runtimeSearchMock, - failItemsMock, + runtimeWaitForBaseWriteLocksMock, knowledgeBaseCreateMock, knowledgeBaseDeleteMock, knowledgeBaseGetByIdMock, @@ -26,15 +26,15 @@ const { knowledgeItemGetLeafDescendantItemsMock } = vi.hoisted(() => ({ runtimeAddItemsMock: vi.fn(), + runtimeCancelAllJobsForBaseMock: vi.fn(), runtimeCreateBaseMock: vi.fn(), runtimeDeleteBaseArtifactsMock: vi.fn(), - runtimeDeleteBaseMock: vi.fn(), runtimeDeleteItemChunkMock: vi.fn(), runtimeDeleteItemsMock: vi.fn(), runtimeListItemChunksMock: vi.fn(), runtimeReindexItemsMock: vi.fn(), runtimeSearchMock: vi.fn(), - failItemsMock: vi.fn(), + runtimeWaitForBaseWriteLocksMock: vi.fn(), knowledgeBaseCreateMock: vi.fn(), knowledgeBaseDeleteMock: vi.fn(), knowledgeBaseGetByIdMock: vi.fn(), @@ -50,14 +50,15 @@ vi.mock('@application', async () => { return mockApplicationFactory({ KnowledgeRuntimeService: { addItems: runtimeAddItemsMock, + cancelAllJobsForBase: runtimeCancelAllJobsForBaseMock, createBase: runtimeCreateBaseMock, - deleteBase: runtimeDeleteBaseMock, deleteBaseArtifacts: runtimeDeleteBaseArtifactsMock, deleteItemChunk: runtimeDeleteItemChunkMock, deleteItems: runtimeDeleteItemsMock, listItemChunks: runtimeListItemChunksMock, reindexItems: runtimeReindexItemsMock, - search: runtimeSearchMock + search: runtimeSearchMock, + waitForBaseWriteLocks: runtimeWaitForBaseWriteLocksMock } } as Parameters[0]) }) @@ -103,10 +104,6 @@ vi.mock('@data/services/KnowledgeItemService', () => ({ } })) -vi.mock('../runtime/utils/cleanup', () => ({ - failItems: failItemsMock -})) - const { KnowledgeOrchestrationService, KnowledgeRuntimeAddItemsPartialError } = await import( '../KnowledgeOrchestrationService' ) @@ -233,15 +230,15 @@ describe('KnowledgeOrchestrationService', () => { knowledgeItemGetItemsByBaseIdMock.mockResolvedValue([]) knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([createNoteItem()]) runtimeAddItemsMock.mockResolvedValue(undefined) + runtimeCancelAllJobsForBaseMock.mockResolvedValue(undefined) runtimeCreateBaseMock.mockResolvedValue(undefined) - runtimeDeleteBaseMock.mockResolvedValue([]) runtimeDeleteBaseArtifactsMock.mockResolvedValue(undefined) runtimeDeleteItemChunkMock.mockResolvedValue(undefined) runtimeDeleteItemsMock.mockResolvedValue(undefined) - failItemsMock.mockResolvedValue(undefined) runtimeListItemChunksMock.mockResolvedValue([]) runtimeReindexItemsMock.mockResolvedValue(undefined) runtimeSearchMock.mockResolvedValue([]) + runtimeWaitForBaseWriteLocksMock.mockResolvedValue(undefined) }) it('uses WhenReady phase and depends on KnowledgeRuntimeService', () => { @@ -302,60 +299,65 @@ describe('KnowledgeOrchestrationService', () => { expect(knowledgeBaseDeleteMock).toHaveBeenCalledWith('kb-1') }) - it('deletes the SQLite base before deleting vector artifacts', async () => { + it('cancels active jobs, waits for locks, then deletes artifacts + SQLite in order', async () => { const service = new KnowledgeOrchestrationService() await expect(service.deleteBase('kb-1')).resolves.toBeUndefined() - expect(runtimeDeleteBaseMock).toHaveBeenCalledWith('kb-1') - expect(knowledgeBaseDeleteMock).toHaveBeenCalledWith('kb-1') + expect(runtimeCancelAllJobsForBaseMock).toHaveBeenCalledWith('kb-1') + expect(runtimeWaitForBaseWriteLocksMock).toHaveBeenCalledWith('kb-1', 35_000) expect(runtimeDeleteBaseArtifactsMock).toHaveBeenCalledWith('kb-1') - expect(knowledgeBaseDeleteMock.mock.invocationCallOrder[0]).toBeLessThan( - runtimeDeleteBaseArtifactsMock.mock.invocationCallOrder[0] - ) + expect(knowledgeBaseDeleteMock).toHaveBeenCalledWith('kb-1') + + const orders = { + cancel: runtimeCancelAllJobsForBaseMock.mock.invocationCallOrder[0], + wait: runtimeWaitForBaseWriteLocksMock.mock.invocationCallOrder[0], + artifacts: runtimeDeleteBaseArtifactsMock.mock.invocationCallOrder[0], + dbDelete: knowledgeBaseDeleteMock.mock.invocationCallOrder[0] + } + expect(orders.cancel).toBeLessThan(orders.wait) + expect(orders.wait).toBeLessThan(orders.artifacts) + expect(orders.artifacts).toBeLessThan(orders.dbDelete) }) - it('does not delete the SQLite base when runtime base interruption fails', async () => { + it('aborts before the artifact delete when cancellation fails', async () => { const service = new KnowledgeOrchestrationService() - const deleteError = new Error('base interruption failed') - runtimeDeleteBaseMock.mockRejectedValueOnce(deleteError) + const cancelError = new Error('cancel failed') + runtimeCancelAllJobsForBaseMock.mockRejectedValueOnce(cancelError) - await expect(service.deleteBase('kb-1')).rejects.toBe(deleteError) + await expect(service.deleteBase('kb-1')).rejects.toBe(cancelError) - expect(runtimeDeleteBaseMock).toHaveBeenCalledWith('kb-1') + expect(runtimeWaitForBaseWriteLocksMock).not.toHaveBeenCalled() + expect(runtimeDeleteBaseArtifactsMock).not.toHaveBeenCalled() expect(knowledgeBaseDeleteMock).not.toHaveBeenCalled() - expect(runtimeDeleteBaseArtifactsMock).not.toHaveBeenCalled() }) - it('marks interrupted runtime items failed when SQLite base deletion fails', async () => { + it('skips SQLite delete when artifact cleanup fails (so user can retry from UI)', async () => { const service = new KnowledgeOrchestrationService() - const deleteError = new Error('sqlite delete failed') - runtimeDeleteBaseMock.mockResolvedValueOnce(['item-1', 'item-2']) - knowledgeBaseDeleteMock.mockRejectedValueOnce(deleteError) + const artifactError = new Error('artifact delete failed') + runtimeDeleteBaseArtifactsMock.mockRejectedValueOnce(artifactError) - await expect(service.deleteBase('kb-1')).rejects.toBe(deleteError) + await expect(service.deleteBase('kb-1')).rejects.toBe(artifactError) - expect(failItemsMock).toHaveBeenCalledWith(['item-1', 'item-2'], 'sqlite delete failed') - expect(runtimeDeleteBaseArtifactsMock).not.toHaveBeenCalled() + expect(knowledgeBaseDeleteMock).not.toHaveBeenCalled() }) - it('reports partial deletion when post-SQLite artifact cleanup fails', async () => { + it('wraps post-artifact SQLite failure as a partial-cleanup invalid-operation error', async () => { const service = new KnowledgeOrchestrationService() - runtimeDeleteBaseArtifactsMock.mockRejectedValueOnce(new Error('artifact delete failed')) + knowledgeBaseDeleteMock.mockRejectedValueOnce(new Error('sqlite delete failed')) await expect(service.deleteBase('kb-1')).rejects.toMatchObject({ message: expect.stringContaining( - 'Invalid operation: deleteBase - SQLite knowledge base was deleted, but vector artifact cleanup failed: artifact delete failed' + 'Invalid operation: deleteBase - Vector artifacts were deleted, but SQLite knowledge base cleanup failed: sqlite delete failed' ), details: { operation: 'deleteBase', - reason: expect.stringContaining('SQLite knowledge base was deleted, but vector artifact cleanup failed') + reason: expect.stringContaining('Vector artifacts were deleted, but SQLite knowledge base cleanup failed') } }) - expect(knowledgeBaseDeleteMock).toHaveBeenCalledWith('kb-1') expect(runtimeDeleteBaseArtifactsMock).toHaveBeenCalledWith('kb-1') - expect(failItemsMock).not.toHaveBeenCalled() + expect(knowledgeBaseDeleteMock).toHaveBeenCalledWith('kb-1') }) it('restores a failed base by creating a new base from source config and adding root items', async () => { @@ -658,7 +660,7 @@ describe('KnowledgeOrchestrationService', () => { knowledgeBaseGetByIdMock.mockResolvedValueOnce(sourceBase) knowledgeItemGetItemsByBaseIdMock.mockResolvedValueOnce([root]) runtimeAddItemsMock.mockRejectedValueOnce(error) - runtimeDeleteBaseMock.mockRejectedValueOnce(new Error('cleanup failed')) + runtimeCancelAllJobsForBaseMock.mockRejectedValueOnce(new Error('cleanup failed')) await expect( service.restoreBase({ diff --git a/src/main/services/knowledge/queue/KnowledgeQueueManager.ts b/src/main/services/knowledge/queue/KnowledgeQueueManager.ts deleted file mode 100644 index 91c92c9464..0000000000 --- a/src/main/services/knowledge/queue/KnowledgeQueueManager.ts +++ /dev/null @@ -1,349 +0,0 @@ -import { loggerService } from '@logger' -import PQueue from 'p-queue' - -import type { - EnqueueKnowledgeTaskOptions, - IndexLeafTaskEntry, - KnowledgeQueueSnapshot, - KnowledgeQueueTaskContext, - KnowledgeQueueTaskDescriptor, - PrepareRootTaskEntry -} from './types' - -const logger = loggerService.withContext('KnowledgeQueueManager') -const DEFAULT_CONCURRENCY = 5 - -class KnowledgeQueueInterruptedError extends Error { - constructor(message: string) { - super(message) - this.name = 'KnowledgeQueueInterruptedError' - } -} - -type KnowledgeQueueTaskStatus = 'pending' | 'running' - -type QueueEntry = EnqueueKnowledgeTaskOptions & { - controller: AbortController - interruptError?: KnowledgeQueueInterruptedError - reject: (error: Error) => void - resolve: () => void - runPromise?: Promise - promise: Promise - status: KnowledgeQueueTaskStatus - settled: boolean -} - -export class KnowledgeQueueManager { - private queue: PQueue - private isResetting = false - private resetReason: string | null = null - private readonly entries = new Map() - // Per-base serialization protects vector-store writes and status completion ordering. - private readonly baseWriteLocks = new Map>() - - constructor() { - this.queue = this.createQueue() - } - - async reset(reason: string): Promise { - if (this.isResetting) { - throw this.createResetError() - } - - this.resetReason = reason - this.isResetting = true - - try { - const interruptedEntries = this.interruptAll(reason) - this.queue.clear() - await this.waitForRunning(interruptedEntries.map((entry) => entry.itemId)) - await this.waitForBaseWriteLocks() - this.queue = this.createQueue() - - return interruptedEntries - } finally { - this.isResetting = false - this.resetReason = null - } - } - - enqueue(options: EnqueueKnowledgeTaskOptions): Promise { - if (this.isResetting) { - return Promise.reject(this.createResetError()) - } - - const existingEntry = this.entries.get(options.item.id) - if (existingEntry) { - return existingEntry.promise - } - - const entry = this.createEntry(options) - this.entries.set(entry.item.id, entry) - this.schedule(entry) - - return entry.promise - } - - interruptItems(itemIds: string[], reason: string): KnowledgeQueueTaskDescriptor[] { - const interruptedEntries = this.getEntriesByIds(itemIds) - - for (const entry of interruptedEntries) { - entry.interruptError ??= new KnowledgeQueueInterruptedError(reason) - - if (!entry.controller.signal.aborted) { - entry.controller.abort(entry.interruptError) - } - - if (entry.status === 'pending') { - this.rejectEntry(entry, this.createInterruptError(entry)) - } - } - - return interruptedEntries.map((entry) => this.createDescriptor(entry)) - } - - interruptBase(baseId: string, reason: string): KnowledgeQueueTaskDescriptor[] { - const itemIds = [...this.entries.values()].filter((entry) => entry.base.id === baseId).map((entry) => entry.item.id) - - return this.interruptItems(itemIds, reason) - } - - interruptAll(reason: string): KnowledgeQueueTaskDescriptor[] { - return this.interruptItems([...this.entries.keys()], reason) - } - - async waitForRunning(itemIds: string[]): Promise { - const runningPromises = this.getEntriesByIds(itemIds) - .filter((entry) => !entry.settled && entry.status === 'running') - .map((entry) => entry.runPromise ?? entry.promise) - - if (runningPromises.length === 0) { - return - } - - await Promise.allSettled(runningPromises) - } - - getSnapshot(): KnowledgeQueueSnapshot { - const snapshot: KnowledgeQueueSnapshot = { - pending: [], - running: [] - } - - for (const entry of this.entries.values()) { - if (entry.settled) { - continue - } - - snapshot[entry.status].push({ - ...this.createDescriptor(entry) - }) - } - - return snapshot - } - - async runWithBaseWriteLockForBase(baseId: string, task: () => Promise): Promise { - if (this.isResetting) { - throw this.createResetError() - } - - const previousLock = this.baseWriteLocks.get(baseId) ?? Promise.resolve() - let releaseCurrentLock!: () => void - const currentLock = new Promise((resolve) => { - releaseCurrentLock = resolve - }) - const nextLock = previousLock.catch(() => undefined).then(() => currentLock) - - this.baseWriteLocks.set(baseId, nextLock) - - try { - await previousLock.catch(() => undefined) - return await task() - } finally { - releaseCurrentLock() - - if (this.baseWriteLocks.get(baseId) === nextLock) { - this.baseWriteLocks.delete(baseId) - } - } - } - - private async waitForBaseWriteLocks(): Promise { - const activeLocks = [...this.baseWriteLocks.values()] - - if (activeLocks.length === 0) { - return - } - - await Promise.allSettled(activeLocks) - } - - private createQueue(): PQueue { - return new PQueue({ concurrency: DEFAULT_CONCURRENCY }) - } - - private createEntry(options: EnqueueKnowledgeTaskOptions): QueueEntry { - const controller = new AbortController() - let resolve!: () => void - let reject!: (error: Error) => void - const promise = new Promise((res, rej) => { - resolve = res - reject = rej - }) - - return { - ...options, - controller, - promise, - reject, - resolve, - settled: false, - status: 'pending' - } - } - - private schedule(entry: QueueEntry): void { - void this.queue.add(async () => { - if (this.entries.get(entry.item.id) !== entry || entry.settled || entry.status !== 'pending') { - return - } - - entry.status = 'running' - entry.runPromise = this.executeEntry(entry) - await entry.runPromise - }) - } - - private async executeEntry(entry: QueueEntry): Promise { - try { - this.throwIfInterrupted(entry) - await this.executeQueueEntry(entry) - - this.throwIfInterrupted(entry) - this.resolveEntry(entry) - } catch (error) { - const taskError = error instanceof Error ? error : new Error(String(error)) - - if (taskError !== entry.interruptError) { - logger.error('Knowledge queue task failed unexpectedly', taskError, { - baseId: entry.base.id, - itemId: entry.item.id, - kind: entry.kind - }) - } - - this.rejectEntry(entry, taskError) - } - } - - private async executeQueueEntry(entry: QueueEntry): Promise { - if (entry.kind === 'index-leaf') { - const context: KnowledgeQueueTaskContext = { - base: entry.base, - baseId: entry.base.id, - item: entry.item, - itemId: entry.item.id, - itemType: entry.item.type, - kind: entry.kind, - signal: entry.controller.signal, - runWithBaseWriteLock: (task) => this.runWithBaseWriteLock(entry, task) - } - - await entry.execute(context) - return - } - - const context: KnowledgeQueueTaskContext = { - base: entry.base, - baseId: entry.base.id, - item: entry.item, - itemId: entry.item.id, - itemType: entry.item.type, - kind: entry.kind, - signal: entry.controller.signal, - runWithBaseWriteLock: (task) => this.runWithBaseWriteLock(entry, task) - } - - await entry.execute(context) - } - - private async runWithBaseWriteLock(entry: QueueEntry, task: () => Promise): Promise { - this.throwIfInterrupted(entry) - - return await this.runWithBaseWriteLockForBase(entry.base.id, async () => { - this.throwIfInterrupted(entry) - - const result = await task() - this.throwIfInterrupted(entry) - return result - }) - } - - private getEntriesByIds(itemIds: string[]): QueueEntry[] { - const entries: QueueEntry[] = [] - - for (const itemId of new Set(itemIds)) { - const entry = this.entries.get(itemId) - if (entry) { - entries.push(entry) - } - } - - return entries - } - - private deleteEntry(entry: QueueEntry): void { - if (this.entries.get(entry.item.id) === entry) { - this.entries.delete(entry.item.id) - } - } - - private createDescriptor(entry: QueueEntry): KnowledgeQueueTaskDescriptor { - return { - base: entry.base, - baseId: entry.base.id, - itemId: entry.item.id, - itemType: entry.item.type, - kind: entry.kind - } - } - - private resolveEntry(entry: QueueEntry): void { - if (entry.settled) { - return - } - - entry.settled = true - entry.resolve() - this.deleteEntry(entry) - } - - private rejectEntry(entry: QueueEntry, error: Error): void { - if (entry.settled) { - return - } - - entry.settled = true - entry.reject(error) - this.deleteEntry(entry) - } - - private throwIfInterrupted(entry: QueueEntry): void { - if (entry.controller.signal.aborted) { - throw this.createInterruptError(entry) - } - } - - private createInterruptError(entry: QueueEntry): Error { - if (!entry.interruptError) { - throw new Error('Knowledge queue entry was aborted without an interrupt error') - } - - return entry.interruptError - } - - private createResetError(): Error { - return new KnowledgeQueueInterruptedError(this.resetReason!) - } -} diff --git a/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.test.ts b/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.test.ts deleted file mode 100644 index 06a14587d6..0000000000 --- a/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.test.ts +++ /dev/null @@ -1,704 +0,0 @@ -import type { KnowledgeBase, KnowledgeItem, KnowledgeItemOf } from '@shared/data/types/knowledge' -import { beforeEach, describe, expect, it, vi } from 'vitest' - -import { KnowledgeQueueManager } from '../KnowledgeQueueManager' -import type { - EnqueueKnowledgeTaskOptions, - IndexLeafTaskEntry, - KnowledgeQueueTaskDescriptor, - PrepareRootTaskEntry -} from '../types' - -const { loggerErrorMock, loggerWarnMock } = vi.hoisted(() => ({ - loggerErrorMock: vi.fn(), - loggerWarnMock: vi.fn() -})) - -vi.mock('@logger', () => ({ - loggerService: { - withContext: () => ({ - debug: vi.fn(), - error: loggerErrorMock, - info: vi.fn(), - warn: loggerWarnMock - }) - } -})) - -const BASE_ID = 'base-1' -const BASE: KnowledgeBase = { - id: BASE_ID, - name: 'Base', - groupId: null, - emoji: '📁', - dimensions: 1024, - embeddingModelId: 'ollama::nomic-embed-text', - status: 'completed', - error: null, - chunkSize: 1024, - chunkOverlap: 200, - searchMode: 'hybrid', - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -function createDeferred() { - let resolve!: (value: T | PromiseLike) => void - let reject!: (reason?: unknown) => void - const promise = new Promise((res, rej) => { - resolve = res - reject = rej - }) - - return { promise, reject, resolve } -} - -function createNoteItem( - id = 'note-1', - status: KnowledgeItem['status'] = 'processing', - baseId = BASE_ID -): KnowledgeItemOf<'note'> { - const lifecycle = - status === 'failed' - ? ({ status, phase: null, error: `failed ${id}` } as const) - : ({ status, phase: null, error: null } as const) - - return { - id, - baseId, - groupId: null, - type: 'note', - data: { source: id, content: `hello ${id}` }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' - } -} - -function createDirectoryItem( - id = 'dir-1', - status: KnowledgeItem['status'] = 'processing', - baseId = BASE_ID -): KnowledgeItemOf<'directory'> { - const lifecycle = - status === 'failed' - ? ({ status, phase: null, error: `failed ${id}` } as const) - : ({ status, phase: null, error: null } as const) - - return { - id, - baseId, - groupId: null, - type: 'directory', - data: { source: `/docs/${id}`, path: `/docs/${id}` }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' - } -} - -function createIndexTask( - itemId: string, - execute: EnqueueKnowledgeTaskOptions['execute'], - baseId = BASE_ID -): EnqueueKnowledgeTaskOptions { - return { - base: { ...BASE, id: baseId }, - kind: 'index-leaf', - item: createNoteItem(itemId, 'processing', baseId), - execute - } -} - -function createPrepareTask( - itemId: string, - execute: EnqueueKnowledgeTaskOptions['execute'], - baseId = BASE_ID -): EnqueueKnowledgeTaskOptions { - return { - base: { ...BASE, id: baseId }, - kind: 'prepare-root', - item: createDirectoryItem(itemId, 'processing', baseId), - execute - } -} - -function createTaskDescriptor( - itemId: string, - kind: KnowledgeQueueTaskDescriptor['kind'] = 'index-leaf', - baseId = BASE_ID -): KnowledgeQueueTaskDescriptor { - return { - base: { ...BASE, id: baseId }, - baseId, - itemId, - itemType: kind === 'index-leaf' ? 'note' : 'directory', - kind - } -} - -function captureError(promise: Promise): Promise { - return promise.then( - () => new Error('Expected promise to reject'), - (error) => (error instanceof Error ? error : new Error(String(error))) - ) -} - -async function flushPromises(): Promise { - await Promise.resolve() - await Promise.resolve() -} - -describe('KnowledgeQueueManager', () => { - beforeEach(() => { - vi.clearAllMocks() - }) - - it('deduplicates queued work for the same item', async () => { - const manager = new KnowledgeQueueManager() - const execute = vi.fn(async () => undefined) - - const firstPromise = manager.enqueue(createIndexTask('item-1', execute)) - const secondPromise = manager.enqueue(createIndexTask('item-1', execute)) - - expect(secondPromise).toBe(firstPromise) - await expect(firstPromise).resolves.toBeUndefined() - expect(execute).toHaveBeenCalledTimes(1) - expect(manager.getSnapshot()).toEqual({ pending: [], running: [] }) - }) - - it('preserves task kind in snapshots and interrupted entries', async () => { - const manager = new KnowledgeQueueManager() - const blocker = createDeferred() - const started = createDeferred() - - const taskPromise = manager.enqueue( - createPrepareTask('dir-1', async () => { - started.resolve() - await blocker.promise - }) - ) - const taskError = captureError(taskPromise) - - await started.promise - - expect(manager.getSnapshot().running).toEqual([createTaskDescriptor('dir-1', 'prepare-root')]) - expect(manager.interruptItems(['dir-1'], 'deleted')).toEqual([createTaskDescriptor('dir-1', 'prepare-root')]) - - blocker.resolve() - await expect(taskError).resolves.toMatchObject({ message: 'deleted' }) - }) - - it('rejects pending tasks on interrupt and does not execute them later', async () => { - const manager = new KnowledgeQueueManager() - const blockers = Array.from({ length: 5 }, () => createDeferred()) - const executedItemIds: string[] = [] - - const runningPromises = blockers.map((deferred, index) => - manager.enqueue( - createIndexTask(`running-${index}`, async (context) => { - executedItemIds.push(context.itemId) - await deferred.promise - }) - ) - ) - - await vi.waitFor(() => { - expect(executedItemIds).toHaveLength(5) - }) - - const pendingPromise = manager.enqueue( - createIndexTask('pending', async (context) => { - executedItemIds.push(context.itemId) - }) - ) - const pendingError = captureError(pendingPromise) - - expect(manager.getSnapshot().pending).toEqual([createTaskDescriptor('pending')]) - - const interruptedEntries = manager.interruptItems(['pending'], 'deleted') - - expect(interruptedEntries).toEqual([createTaskDescriptor('pending')]) - await expect(pendingError).resolves.toMatchObject({ message: 'deleted' }) - expect(manager.getSnapshot().pending).toEqual([]) - - for (const blocker of blockers) { - blocker.resolve() - } - - await expect(Promise.all(runningPromises)).resolves.toEqual([undefined, undefined, undefined, undefined, undefined]) - await flushPromises() - expect(executedItemIds).not.toContain('pending') - }) - - it('waits for interrupted running tasks to really finish before waitForRunning resolves', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - let waitResolved = false - let signalAbortedAfterFinish = false - - const taskPromise = manager.enqueue( - createIndexTask('running', async (context) => { - started.resolve() - await finish.promise - signalAbortedAfterFinish = context.signal.aborted - }) - ) - const taskError = captureError(taskPromise) - - await started.promise - manager.interruptItems(['running'], 'deleted') - - const waitPromise = manager.waitForRunning(['running']).then(() => { - waitResolved = true - }) - await flushPromises() - - expect(waitResolved).toBe(false) - - finish.resolve() - await waitPromise - - expect(signalAbortedAfterFinish).toBe(true) - await expect(taskError).resolves.toMatchObject({ message: 'deleted' }) - expect(loggerErrorMock).not.toHaveBeenCalled() - }) - - it('treats signal throwIfAborted as a normal running task interruption', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - - const taskPromise = manager.enqueue( - createIndexTask('running', async (context) => { - started.resolve() - await finish.promise - context.signal.throwIfAborted() - }) - ) - const taskError = captureError(taskPromise) - - await started.promise - manager.interruptItems(['running'], 'deleted') - finish.resolve() - - await expect(taskError).resolves.toMatchObject({ message: 'deleted' }) - expect(loggerErrorMock).not.toHaveBeenCalled() - }) - - it('resets pending work and waits for running work to settle', async () => { - const manager = new KnowledgeQueueManager() - const blockers = Array.from({ length: 5 }, () => createDeferred()) - const executedItemIds: string[] = [] - - const runningPromises = blockers.map((deferred, index) => - manager.enqueue( - createIndexTask(`running-${index}`, async (context) => { - executedItemIds.push(context.itemId) - await deferred.promise - }) - ) - ) - const runningErrors = runningPromises.map(captureError) - - await vi.waitFor(() => { - expect(manager.getSnapshot().running).toHaveLength(5) - }) - - const pendingPromise = manager.enqueue( - createIndexTask('pending', async (context) => { - executedItemIds.push(context.itemId) - }) - ) - const pendingError = captureError(pendingPromise) - let resetResolved = false - - const resetPromise = manager.reset('reset').then((entries) => { - resetResolved = true - return entries - }) - - await expect(pendingError).resolves.toMatchObject({ message: 'reset' }) - await flushPromises() - expect(resetResolved).toBe(false) - - for (const blocker of blockers) { - blocker.resolve() - } - - await expect(resetPromise).resolves.toEqual([ - ...Array.from({ length: 5 }, (_, index) => createTaskDescriptor(`running-${index}`)), - createTaskDescriptor('pending') - ]) - await expect(Promise.all(runningErrors)).resolves.toEqual( - Array.from({ length: 5 }, () => expect.objectContaining({ message: 'reset' })) - ) - expect(manager.getSnapshot()).toEqual({ pending: [], running: [] }) - expect(executedItemIds).toEqual(['running-0', 'running-1', 'running-2', 'running-3', 'running-4']) - }) - - it('rejects new work while reset is waiting for running work', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - const executeAfterReset = vi.fn(async () => undefined) - - const runningPromise = manager.enqueue( - createIndexTask('running', async () => { - started.resolve() - await finish.promise - }) - ) - const runningError = captureError(runningPromise) - - await started.promise - - const resetPromise = manager.reset('reset') - const rejectedDuringReset = captureError(manager.enqueue(createIndexTask('during-reset', executeAfterReset))) - - await expect(rejectedDuringReset).resolves.toMatchObject({ message: 'reset' }) - expect(executeAfterReset).not.toHaveBeenCalled() - - finish.resolve() - - await expect(resetPromise).resolves.toEqual([createTaskDescriptor('running')]) - await expect(runningError).resolves.toMatchObject({ message: 'reset' }) - - await expect(manager.enqueue(createIndexTask('after-reset', executeAfterReset))).resolves.toBeUndefined() - expect(executeAfterReset).toHaveBeenCalledOnce() - }) - - it('rejects new external writes while reset is waiting for running work', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - const executeAfterReset = vi.fn(async () => undefined) - - const runningPromise = manager.enqueue( - createIndexTask('running', async () => { - started.resolve() - await finish.promise - }) - ) - const runningError = captureError(runningPromise) - - await started.promise - - const resetPromise = manager.reset('reset') - const rejectedDuringReset = captureError( - manager.runWithBaseWriteLockForBase(BASE_ID, async () => { - await executeAfterReset() - }) - ) - - await expect(rejectedDuringReset).resolves.toMatchObject({ message: 'reset' }) - expect(executeAfterReset).not.toHaveBeenCalled() - - finish.resolve() - - await expect(resetPromise).resolves.toEqual([createTaskDescriptor('running')]) - await expect(runningError).resolves.toMatchObject({ message: 'reset' }) - - await expect(manager.runWithBaseWriteLockForBase(BASE_ID, executeAfterReset)).resolves.toBeUndefined() - expect(executeAfterReset).toHaveBeenCalledOnce() - }) - - it('waits for active external writes before reset resolves', async () => { - const manager = new KnowledgeQueueManager() - const activeWriteStarted = createDeferred() - const releaseActiveWrite = createDeferred() - let resetResolved = false - const events: string[] = [] - - const externalPromise = manager.runWithBaseWriteLockForBase(BASE_ID, async () => { - events.push('lock:external') - activeWriteStarted.resolve() - await releaseActiveWrite.promise - events.push('unlock:external') - }) - - await activeWriteStarted.promise - - const resetPromise = manager.reset('reset').then((entries) => { - resetResolved = true - return entries - }) - await flushPromises() - - expect(resetResolved).toBe(false) - expect(events).toEqual(['lock:external']) - - releaseActiveWrite.resolve() - - await expect(resetPromise).resolves.toEqual([]) - await expect(externalPromise).resolves.toBeUndefined() - expect(events).toEqual(['lock:external', 'unlock:external']) - }) - - it('waits for external writes already chained behind queued writes before reset resolves', async () => { - const manager = new KnowledgeQueueManager() - const queuedWriteStarted = createDeferred() - const releaseQueuedWrite = createDeferred() - const releaseExternalWrite = createDeferred() - let resetResolved = false - const events: string[] = [] - - const queuedPromise = manager.enqueue( - createIndexTask('queued', async (context) => { - await context.runWithBaseWriteLock(async () => { - events.push('lock:queued') - queuedWriteStarted.resolve() - await releaseQueuedWrite.promise - events.push('unlock:queued') - }) - }) - ) - const queuedError = captureError(queuedPromise) - - await queuedWriteStarted.promise - - const externalPromise = manager.runWithBaseWriteLockForBase(BASE_ID, async () => { - events.push('lock:external') - await releaseExternalWrite.promise - events.push('unlock:external') - }) - const resetPromise = manager.reset('reset').then((entries) => { - resetResolved = true - return entries - }) - - await flushPromises() - expect(resetResolved).toBe(false) - expect(events).toEqual(['lock:queued']) - - releaseQueuedWrite.resolve() - - await vi.waitFor(() => { - expect(events).toEqual(['lock:queued', 'unlock:queued', 'lock:external']) - }) - expect(resetResolved).toBe(false) - - releaseExternalWrite.resolve() - - await expect(resetPromise).resolves.toEqual([createTaskDescriptor('queued')]) - await expect(queuedError).resolves.toMatchObject({ message: 'reset' }) - await expect(externalPromise).resolves.toBeUndefined() - expect(events).toEqual(['lock:queued', 'unlock:queued', 'lock:external', 'unlock:external']) - }) - - it('rejects a second reset with the current reset reason while reset is running', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - - const runningPromise = manager.enqueue( - createIndexTask('running', async () => { - started.resolve() - await finish.promise - }) - ) - const runningError = captureError(runningPromise) - - await started.promise - - const resetPromise = manager.reset('first-reset') - const secondResetError = captureError(manager.reset('second-reset')) - - await expect(secondResetError).resolves.toMatchObject({ message: 'first-reset' }) - - finish.resolve() - - await expect(resetPromise).resolves.toEqual([createTaskDescriptor('running')]) - await expect(runningError).resolves.toMatchObject({ message: 'first-reset' }) - expect(loggerErrorMock).not.toHaveBeenCalled() - }) - - it('serializes writes for the same base', async () => { - const manager = new KnowledgeQueueManager() - const releaseFirstWrite = createDeferred() - const firstInWriteLock = createDeferred() - const secondStarted = createDeferred() - const events: string[] = [] - - const firstPromise = manager.enqueue( - createIndexTask('first', async (context) => { - await context.runWithBaseWriteLock(async () => { - events.push('lock:first') - firstInWriteLock.resolve() - await releaseFirstWrite.promise - events.push('unlock:first') - }) - }) - ) - const secondPromise = manager.enqueue( - createIndexTask('second', async (context) => { - secondStarted.resolve() - await context.runWithBaseWriteLock(async () => { - events.push('lock:second') - }) - }) - ) - - await firstInWriteLock.promise - await secondStarted.promise - await flushPromises() - - expect(events).toEqual(['lock:first']) - - releaseFirstWrite.resolve() - await expect(Promise.all([firstPromise, secondPromise])).resolves.toEqual([undefined, undefined]) - expect(events).toEqual(['lock:first', 'unlock:first', 'lock:second']) - }) - - it('serializes external writes with queued writes for the same base', async () => { - const manager = new KnowledgeQueueManager() - const releaseQueuedWrite = createDeferred() - const queuedInWriteLock = createDeferred() - const events: string[] = [] - - const queuedPromise = manager.enqueue( - createIndexTask('queued', async (context) => { - await context.runWithBaseWriteLock(async () => { - events.push('lock:queued') - queuedInWriteLock.resolve() - await releaseQueuedWrite.promise - events.push('unlock:queued') - }) - }) - ) - - await queuedInWriteLock.promise - const externalPromise = manager.runWithBaseWriteLockForBase(BASE_ID, async () => { - events.push('lock:external') - }) - await flushPromises() - - expect(events).toEqual(['lock:queued']) - - releaseQueuedWrite.resolve() - await expect(Promise.all([queuedPromise, externalPromise])).resolves.toEqual([undefined, undefined]) - expect(events).toEqual(['lock:queued', 'unlock:queued', 'lock:external']) - }) - - it('does not block different-base external writes', async () => { - const manager = new KnowledgeQueueManager() - const releaseFirstBaseWrite = createDeferred() - const firstBaseInWriteLock = createDeferred() - const events: string[] = [] - - const queuedPromise = manager.enqueue( - createIndexTask('queued', async (context) => { - await context.runWithBaseWriteLock(async () => { - events.push('lock:base-1') - firstBaseInWriteLock.resolve() - await releaseFirstBaseWrite.promise - events.push('unlock:base-1') - }) - }) - ) - - await firstBaseInWriteLock.promise - await manager.runWithBaseWriteLockForBase('base-2', async () => { - events.push('lock:base-2') - }) - - expect(events).toEqual(['lock:base-1', 'lock:base-2']) - - releaseFirstBaseWrite.resolve() - await expect(queuedPromise).resolves.toBeUndefined() - expect(events).toEqual(['lock:base-1', 'lock:base-2', 'unlock:base-1']) - }) - - it('does not enter the base write lock body after being interrupted while waiting', async () => { - const manager = new KnowledgeQueueManager() - const releaseFirstWrite = createDeferred() - const firstInWriteLock = createDeferred() - const secondStarted = createDeferred() - const events: string[] = [] - - const firstPromise = manager.enqueue( - createIndexTask('first', async (context) => { - await context.runWithBaseWriteLock(async () => { - events.push('lock:first') - firstInWriteLock.resolve() - await releaseFirstWrite.promise - events.push('unlock:first') - }) - }) - ) - const secondPromise = manager.enqueue( - createIndexTask('second', async (context) => { - secondStarted.resolve() - await context.runWithBaseWriteLock(async () => { - events.push('lock:second') - }) - }) - ) - const secondError = captureError(secondPromise) - - await firstInWriteLock.promise - await secondStarted.promise - - manager.interruptItems(['second'], 'deleted') - releaseFirstWrite.resolve() - - await expect(firstPromise).resolves.toBeUndefined() - await expect(secondError).resolves.toMatchObject({ message: 'deleted' }) - expect(events).toEqual(['lock:first', 'unlock:first']) - }) - - it('rejects failed tasks, logs unexpected errors, and continues later work', async () => { - const manager = new KnowledgeQueueManager() - const executeNext = vi.fn(async () => undefined) - const failure = new Error('execute failed') - - const failedPromise = manager.enqueue( - createIndexTask('failed', async () => { - throw failure - }) - ) - const failedError = captureError(failedPromise) - const nextPromise = manager.enqueue(createIndexTask('next', executeNext)) - - await expect(failedError).resolves.toBe(failure) - await expect(nextPromise).resolves.toBeUndefined() - expect(executeNext).toHaveBeenCalledOnce() - expect(manager.getSnapshot()).toEqual({ pending: [], running: [] }) - expect(loggerErrorMock).toHaveBeenCalledWith('Knowledge queue task failed unexpectedly', failure, { - baseId: BASE_ID, - itemId: 'failed', - kind: 'index-leaf' - }) - }) - - it('logs non-interruption errors even after a task has been aborted', async () => { - const manager = new KnowledgeQueueManager() - const started = createDeferred() - const finish = createDeferred() - const failure = new Error('failed after abort') - - const taskPromise = manager.enqueue( - createIndexTask('running', async (context) => { - started.resolve() - await finish.promise - - if (context.signal.aborted) { - throw failure - } - }) - ) - const taskError = captureError(taskPromise) - - await started.promise - manager.interruptItems(['running'], 'deleted') - finish.resolve() - - await expect(taskError).resolves.toBe(failure) - expect(loggerErrorMock).toHaveBeenCalledWith('Knowledge queue task failed unexpectedly', failure, { - baseId: BASE_ID, - itemId: 'running', - kind: 'index-leaf' - }) - }) -}) diff --git a/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.types.ts b/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.types.ts deleted file mode 100644 index debbe6103e..0000000000 --- a/src/main/services/knowledge/queue/__tests__/KnowledgeQueueManager.types.ts +++ /dev/null @@ -1,163 +0,0 @@ -/** - * Type-safety regression tests for knowledge queue task entries. - * - * This file is typechecked by `pnpm typecheck:node`; every `@ts-expect-error` - * directive asserts an invalid queue task shape that must stay rejected. - */ - -import type { KnowledgeBase, KnowledgeItem, KnowledgeItemOf } from '@shared/data/types/knowledge' - -import type { EnqueueKnowledgeTaskOptions } from '../types' - -const base: KnowledgeBase = { - id: 'base-1', - name: 'Base', - groupId: null, - emoji: '📁', - dimensions: 1024, - embeddingModelId: 'ollama::nomic-embed-text', - status: 'completed', - error: null, - chunkSize: 1024, - chunkOverlap: 200, - searchMode: 'hybrid', - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const lifecycle = { - status: 'processing', - phase: null, - error: null -} as const satisfies Pick - -const noteItem: KnowledgeItemOf<'note'> = { - id: 'note-1', - baseId: base.id, - groupId: null, - type: 'note', - data: { source: 'note-1', content: 'hello note-1' }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const fileItem: KnowledgeItemOf<'file'> = { - id: 'file-1', - baseId: base.id, - groupId: null, - type: 'file', - data: { - source: 'file-1', - file: { - id: 'file-1', - name: 'file.md', - origin_name: 'file.md', - path: '/tmp/file.md', - size: 1, - ext: '.md', - type: 'text', - created_at: '2026-04-08T00:00:00.000Z', - count: 1 - } - }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const urlItem: KnowledgeItemOf<'url'> = { - id: 'url-1', - baseId: base.id, - groupId: null, - type: 'url', - data: { source: 'url-1', url: 'https://example.com' }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const directoryItem: KnowledgeItemOf<'directory'> = { - id: 'dir-1', - baseId: base.id, - groupId: null, - type: 'directory', - data: { source: '/tmp/docs', path: '/tmp/docs' }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const sitemapItem: KnowledgeItemOf<'sitemap'> = { - id: 'sitemap-1', - baseId: base.id, - groupId: null, - type: 'sitemap', - data: { source: 'https://example.com/sitemap.xml', url: 'https://example.com/sitemap.xml' }, - ...lifecycle, - createdAt: '2026-04-08T00:00:00.000Z', - updatedAt: '2026-04-08T00:00:00.000Z' -} - -const ok = async (): Promise => undefined - -const validTasks = [ - { - base, - item: noteItem, - kind: 'index-leaf', - execute: ok - }, - { - base, - item: fileItem, - kind: 'index-leaf', - execute: ok - }, - { - base, - item: urlItem, - kind: 'index-leaf', - execute: ok - }, - { - base, - item: directoryItem, - kind: 'prepare-root', - execute: ok - }, - { - base, - item: sitemapItem, - kind: 'prepare-root', - execute: ok - } -] satisfies EnqueueKnowledgeTaskOptions[] -void validTasks - -// @ts-expect-error - sitemap roots must be prepared before leaf indexing. -const _indexSitemap: EnqueueKnowledgeTaskOptions = { - base, - item: sitemapItem, - kind: 'index-leaf', - execute: ok -} -void _indexSitemap - -// @ts-expect-error - note leaf items cannot be prepared as roots. -const _prepareNote: EnqueueKnowledgeTaskOptions = { - base, - item: noteItem, - kind: 'prepare-root', - execute: ok -} -void _prepareNote - -const _rawItemId: EnqueueKnowledgeTaskOptions = { - base, - // @ts-expect-error - public enqueue entries must carry the typed item, not a raw id. - itemId: sitemapItem.id, - kind: 'index-leaf', - execute: ok -} -void _rawItemId diff --git a/src/main/services/knowledge/queue/types.ts b/src/main/services/knowledge/queue/types.ts deleted file mode 100644 index a009e4c090..0000000000 --- a/src/main/services/knowledge/queue/types.ts +++ /dev/null @@ -1,51 +0,0 @@ -import type { KnowledgeBase, KnowledgeItemOf, KnowledgeItemType } from '@shared/data/types/knowledge' - -import type { IndexableKnowledgeItem } from '../types/items' - -interface KnowledgeQueueBaseTaskEntry { - base: KnowledgeBase - item: TItem -} - -export interface IndexLeafTaskEntry extends KnowledgeQueueBaseTaskEntry { - kind: 'index-leaf' -} - -export interface PrepareRootTaskEntry - extends KnowledgeQueueBaseTaskEntry | KnowledgeItemOf<'sitemap'>> { - kind: 'prepare-root' -} - -export type KnowledgeQueueTaskEntry = IndexLeafTaskEntry | PrepareRootTaskEntry - -export type KnowledgeQueueTaskContext = - TEntry extends KnowledgeQueueTaskEntry - ? TEntry & { - baseId: string - itemId: string - itemType: TEntry['item']['type'] - /** Interruption waits for running work to observe this signal and settle. */ - signal: AbortSignal - runWithBaseWriteLock(task: () => Promise): Promise - } - : never - -export type EnqueueKnowledgeTaskOptions = - TEntry extends KnowledgeQueueTaskEntry - ? TEntry & { - execute: (context: KnowledgeQueueTaskContext) => Promise - } - : never - -export interface KnowledgeQueueTaskDescriptor { - base: KnowledgeBase - baseId: string - itemId: string - itemType: KnowledgeItemType - kind: KnowledgeQueueTaskEntry['kind'] -} - -export interface KnowledgeQueueSnapshot { - pending: KnowledgeQueueTaskDescriptor[] - running: KnowledgeQueueTaskDescriptor[] -} diff --git a/src/main/services/knowledge/runtime/KnowledgeRuntimeService.ts b/src/main/services/knowledge/runtime/KnowledgeRuntimeService.ts index e95a538216..cd69575dd3 100644 --- a/src/main/services/knowledge/runtime/KnowledgeRuntimeService.ts +++ b/src/main/services/knowledge/runtime/KnowledgeRuntimeService.ts @@ -3,71 +3,70 @@ import { knowledgeBaseService } from '@data/services/KnowledgeBaseService' import { knowledgeItemService } from '@data/services/KnowledgeItemService' import { loggerService } from '@logger' import { BaseService, DependsOn, Injectable, Phase, ServicePhase } from '@main/core/lifecycle' -import { DataApiErrorFactory, ErrorCode, isDataApiError } from '@shared/data/api' +import { DataApiErrorFactory } from '@shared/data/api' import { - type KnowledgeBase, KnowledgeChunkMetadataSchema, type KnowledgeItem, type KnowledgeItemChunk, - type KnowledgeItemOf, type KnowledgeRuntimeAddItemInput, type KnowledgeSearchResult } from '@shared/data/types/knowledge' import { MetadataMode } from '@vectorstores/core' import { embedMany } from 'ai' -import { KnowledgeQueueManager } from '../queue/KnowledgeQueueManager' -import type { - IndexLeafTaskEntry, - KnowledgeQueueTaskContext, - KnowledgeQueueTaskDescriptor, - KnowledgeQueueTaskEntry, - PrepareRootTaskEntry -} from '../queue/types' -import { loadKnowledgeItemDocuments } from '../readers/KnowledgeReader' import { rerankKnowledgeSearchResults } from '../rerank/rerank' -import type { IndexableKnowledgeItem } from '../types/items' -import { chunkDocuments } from '../utils/chunk' -import { embedDocuments } from '../utils/embed' -import { filterIndexableKnowledgeItems, isIndexableKnowledgeItem } from '../utils/items' +import { indexLeafJobHandler } from '../tasks/indexLeafJobHandler' +import { prepareRootJobHandler } from '../tasks/prepareRootJobHandler' +import { filterIndexableKnowledgeItems, isContainerKnowledgeItem } from '../utils/items' import { getEmbedModel } from '../utils/model' import { mapChunkDocument } from './utils/chunks' -import { deleteItemVectors, deleteVectorsForEntries, failItems } from './utils/cleanup' -import { prepareKnowledgeItem } from './utils/prepare' +import { deleteItemVectors } from './utils/cleanup' import { applyRelevanceThreshold, getInitialSearchScoreKind, withSearchRanks } from './utils/search' const logger = loggerService.withContext('KnowledgeRuntimeService') -const SHUTDOWN_INTERRUPTED_REASON = 'Knowledge task interrupted by service shutdown' -const DELETE_INTERRUPTED_REASON = 'Knowledge task interrupted by item deletion' -const REINDEX_INTERRUPTED_REASON = 'Knowledge task interrupted by reindex' -const KNOWLEDGE_EMPTY_CONTENT_REASON = 'KNOWLEDGE_EMPTY_CONTENT' +const ACTIVE_STATUSES = ['pending', 'delayed', 'running'] as const +const ACTIVE_JOB_LIMIT = 5000 +const DEFAULT_LOCK_WAIT_TIMEOUT_MS = 35_000 const SEARCH_TOKEN_PATTERN = /[\p{L}\p{N}_]+/u -type QueueTaskLogContext = { - baseId: string - itemId: string - kind: KnowledgeQueueTaskEntry['kind'] -} - -const assertNeverKnowledgeItem = (item: never): never => { - throw new Error(`Unsupported knowledge item type: ${String((item as { type?: unknown }).type)}`) -} +type JobInputWithItem = { itemId?: string } | null @Injectable('KnowledgeRuntimeService') @ServicePhase(Phase.WhenReady) @DependsOn(['KnowledgeVectorStoreService']) export class KnowledgeRuntimeService extends BaseService { - private queue = new KnowledgeQueueManager() + /** + * Layer 3 business mutex per knowledge base. Promise-chain serialization + * keeps vector-store writes and DB status flips together across all handler + * instances, including instances created after crash + retry. + * + * @internal Handlers reach this only via runWithBaseWriteLockForBase(). + */ + private readonly baseWriteLocks = new Map>() protected onInit(): void { - this.queue = new KnowledgeQueueManager() + const jobManager = application.get('JobManager') + jobManager.registerHandler('knowledge.prepare-root', prepareRootJobHandler) + jobManager.registerHandler('knowledge.index-leaf', indexLeafJobHandler) } protected async onStop(): Promise { - const interruptedEntries = this.queue.interruptAll(SHUTDOWN_INTERRUPTED_REASON) - await this.queue.waitForRunning(interruptedEntries.map((entry) => entry.itemId)) - await this.cleanupInterruptedEntries(interruptedEntries, SHUTDOWN_INTERRUPTED_REASON) + const jobManager = application.get('JobManager') + await Promise.allSettled([ + jobManager.cancelMany({ type: 'knowledge.prepare-root' }, 'service-shutdown'), + jobManager.cancelMany({ type: 'knowledge.index-leaf' }, 'service-shutdown') + ]) + // Cap the drain wait so a wedged handler cannot block process exit beyond + // the outer Application shutdown timeout (5s). Stragglers past this point + // are recovered on next startup. + await this.waitForBaseWriteLocks(undefined, DEFAULT_LOCK_WAIT_TIMEOUT_MS) + // Intentionally no item.status rollback. Items left in 'processing' here + // are recovered after restart: JobManager.onAllReady's startup-recovery + // flips their jobs back to 'pending' and the handler re-runs. The handler + // early-returns when item.status is already 'completed', so the only + // observable cost is the brief window where item.status lingers as + // 'processing' between shutdown and next startup recovery. } async createBase(baseId: string): Promise { @@ -76,34 +75,16 @@ export class KnowledgeRuntimeService extends BaseService { await vectorStoreService.createStore(base) } - async deleteBase(baseId: string): Promise { - const interruptedEntries = this.queue.interruptBase(baseId, DELETE_INTERRUPTED_REASON) - await this.queue.waitForRunning(interruptedEntries.map((entry) => entry.itemId)) - - let cleanupEntries: Array<{ base: KnowledgeBase; baseId: string; itemIds: string[] }> - try { - cleanupEntries = await this.expandInterruptedEntries(interruptedEntries) - } catch (error) { - const normalizedError = error instanceof Error ? error : new Error(String(error)) - await this.persistFailureStateBestEffort( - interruptedEntries.map((entry) => entry.itemId), - normalizedError.message, - { - baseId, - operation: 'deleteBase' - } - ) - throw error - } - - return cleanupEntries.flatMap((entry) => entry.itemIds) - } - async deleteBaseArtifacts(baseId: string): Promise { const vectorStoreService = application.get('KnowledgeVectorStoreService') await vectorStoreService.deleteStore(baseId) } + async cancelAllJobsForBase(baseId: string): Promise { + const jobManager = application.get('JobManager') + await jobManager.cancelMany({ queue: `base.${baseId}` }, 'delete-base') + } + async addItems(baseId: string, inputs: KnowledgeRuntimeAddItemInput[]): Promise { if (inputs.length === 0) { return @@ -112,15 +93,19 @@ export class KnowledgeRuntimeService extends BaseService { const base = await knowledgeBaseService.getById(baseId) const acceptedItems: KnowledgeItem[] = [] - await this.queue.runWithBaseWriteLockForBase(base.id, async () => { + // Hold the Layer 3 lock across create + status + enqueue so a concurrent + // reindexItems cannot interleave its list-and-cancel pass partway through. + await this.runWithBaseWriteLockForBase(base.id, async () => { try { for (const input of inputs) { const createdItem = await knowledgeItemService.create(base.id, input) acceptedItems.push(createdItem) - acceptedItems[acceptedItems.length - 1] = - createdItem.type === 'directory' || createdItem.type === 'sitemap' - ? await knowledgeItemService.updateStatus(createdItem.id, 'processing', { phase: 'preparing' }) - : await knowledgeItemService.updateStatus(createdItem.id, 'processing') + acceptedItems[acceptedItems.length - 1] = isContainerKnowledgeItem(createdItem) + ? await knowledgeItemService.updateStatus(createdItem.id, 'processing', { phase: 'preparing' }) + : await knowledgeItemService.updateStatus(createdItem.id, 'processing') + } + for (const item of acceptedItems) { + await this.enqueueRootItem(item) } } catch (error) { const normalizedError = error instanceof Error ? error : new Error(String(error)) @@ -133,83 +118,126 @@ export class KnowledgeRuntimeService extends BaseService { throw error } }) - - for (const item of acceptedItems) { - await this.submitRuntimeItem(base, item) - } } async reindexItems(baseId: string, rootItems: KnowledgeItem[]): Promise { + const jobManager = application.get('JobManager') const base = await knowledgeBaseService.getById(baseId) const rootIds = [...new Set(rootItems.map((item) => item.id))] - let interruptIds = rootIds - try { - const interrupted = await this.interruptRootsAndDescendants(base.id, rootIds, REINDEX_INTERRUPTED_REASON) - interruptIds = interrupted.interruptIds + // Phase 1 (locked): identify active jobs whose itemId falls inside our + // subtree. Without the lock a concurrent addItems could land between list + // and cancel, smuggling new jobs past our cleanup. + let jobIdsToCancel: string[] = [] + await this.runWithBaseWriteLockForBase(baseId, async () => { + const allItems = await knowledgeItemService.getDescendantAndSelfItems(baseId, rootIds) + const allItemIds = new Set(allItems.map((item) => item.id)) + const activeJobs = await jobManager.list({ + queue: `base.${baseId}`, + status: [...ACTIVE_STATUSES], + limit: ACTIVE_JOB_LIMIT + }) + jobIdsToCancel = activeJobs + .filter((job) => allItemIds.has((job.input as JobInputWithItem)?.itemId ?? '')) + .map((job) => job.id) + }) + // Phase 2 (unlocked): JobManager.cancel waits up to cancelTimeoutMs per + // in-flight job for the handler to react. Cancelling in parallel bounds + // total wait by the slowest single handler, not the sum across all. + await Promise.all( + jobIdsToCancel.map((jobId) => + jobManager.cancel(jobId, 'reindex').catch((error) => { + logger.warn('reindex cancel failed (job may already be terminal)', { + jobId, + error: error instanceof Error ? error.message : String(error) + }) + }) + ) + ) + + // Phase 3: wait for any straggler Layer 3 locks to drain. + // 35s = JobManager.cancelTimeoutMs (30s) + 5s buffer. + await this.waitForBaseWriteLocks(baseId, DEFAULT_LOCK_WAIT_TIMEOUT_MS) + + // Phase 4 (locked): clean stale vectors + stale leaf DB rows for any + // container roots, then re-enqueue. + await this.runWithBaseWriteLockForBase(baseId, async () => { const leafItems = filterIndexableKnowledgeItems( - await knowledgeItemService.getLeafDescendantItems(base.id, rootIds) + await knowledgeItemService.getLeafDescendantItems(baseId, rootIds) ) - await this.deleteItemVectorsOrFailItems( - base, - leafItems.map((item) => item.id), - interruptIds, - { baseId: base.id, operation: 'reindexItems', rootIds } - ) - - const containerItems = rootItems.filter( - (item): item is KnowledgeItemOf<'directory'> | KnowledgeItemOf<'sitemap'> => - item.type === 'directory' || item.type === 'sitemap' - ) - if (containerItems.length > 0) { - // Reindexing directory/sitemap roots rebuilds their leaf children from the source: - // old leaf items are deleted here, then preparation creates fresh leaf items to index. - await knowledgeItemService.deleteLeafDescendantItems( - base.id, - containerItems.map((item) => item.id) + if (leafItems.length > 0) { + await deleteItemVectors( + base, + leafItems.map((item) => item.id) ) } - for (const containerItem of containerItems) { - const preparedRoot = await knowledgeItemService.updateStatus(containerItem.id, 'processing', { - phase: 'preparing' - }) - await this.submitRuntimeItem(base, preparedRoot) + const containers = rootItems.filter(isContainerKnowledgeItem) + if (containers.length > 0) { + // Drop the previous expansion so prepare-root can recreate fresh leaves. + await knowledgeItemService.deleteLeafDescendantItems( + baseId, + containers.map((item) => item.id) + ) } - for (const leafItem of rootItems.filter(isIndexableKnowledgeItem)) { - const processingItem = await knowledgeItemService.updateStatus(leafItem.id, 'processing') - if (isIndexableKnowledgeItem(processingItem)) { - this.enqueueIndexItem(base, processingItem) - } + for (const item of rootItems) { + await knowledgeItemService.updateStatus( + item.id, + 'processing', + isContainerKnowledgeItem(item) ? { phase: 'preparing' } : undefined + ) + await this.enqueueRootItem(item) } - } catch (error) { - await this.failItemsAndRethrow(interruptIds, error, { baseId: base.id, operation: 'reindexItems', rootIds }) - } + }) } async deleteItems(baseId: string, rootItems: KnowledgeItem[]): Promise { + const jobManager = application.get('JobManager') const base = await knowledgeBaseService.getById(baseId) const rootIds = [...new Set(rootItems.map((item) => item.id))] - let interruptIds = rootIds - try { - const interrupted = await this.interruptRootsAndDescendants(base.id, rootIds, DELETE_INTERRUPTED_REASON) - interruptIds = interrupted.interruptIds + let jobIdsToCancel: string[] = [] + await this.runWithBaseWriteLockForBase(baseId, async () => { + const allItems = await knowledgeItemService.getDescendantAndSelfItems(baseId, rootIds) + const allItemIds = new Set(allItems.map((item) => item.id)) + const activeJobs = await jobManager.list({ + queue: `base.${baseId}`, + status: [...ACTIVE_STATUSES], + limit: ACTIVE_JOB_LIMIT + }) + jobIdsToCancel = activeJobs + .filter((job) => allItemIds.has((job.input as JobInputWithItem)?.itemId ?? '')) + .map((job) => job.id) + }) + await Promise.all( + jobIdsToCancel.map((jobId) => + jobManager.cancel(jobId, 'delete-items').catch((error) => { + logger.warn('delete-items cancel failed (job may already be terminal)', { + jobId, + error: error instanceof Error ? error.message : String(error) + }) + }) + ) + ) + + await this.waitForBaseWriteLocks(baseId, DEFAULT_LOCK_WAIT_TIMEOUT_MS) + + // Cleanup vectors for leaf items in the subtree. The orchestration layer + // deletes the knowledge_item DB rows after this returns. + await this.runWithBaseWriteLockForBase(baseId, async () => { const leafItems = filterIndexableKnowledgeItems( - await knowledgeItemService.getLeafDescendantItems(base.id, rootIds) + await knowledgeItemService.getLeafDescendantItems(baseId, rootIds) ) - await this.deleteItemVectorsOrFailItems( - base, - leafItems.map((item) => item.id), - interruptIds, - { baseId: base.id, operation: 'deleteItems', rootIds } - ) - } catch (error) { - await this.failItemsAndRethrow(interruptIds, error, { baseId: base.id, operation: 'deleteItems', rootIds }) - } + if (leafItems.length > 0) { + await deleteItemVectors( + base, + leafItems.map((item) => item.id) + ) + } + }) } async search(baseId: string, query: string): Promise { @@ -284,221 +312,97 @@ export class KnowledgeRuntimeService extends BaseService { await vectorStore.deleteByIdAndExternalId(chunkId, itemId) } - private async submitRuntimeItem(base: KnowledgeBase, item: KnowledgeItem): Promise { - switch (item.type) { - case 'file': - case 'url': - case 'note': - this.enqueueIndexItem(base, item) - return - case 'directory': - case 'sitemap': - this.enqueuePrepareRoot(base, item) - return - default: - assertNeverKnowledgeItem(item) + /** + * Acquire the Layer 3 mutex for `baseId`, run `task`, release the mutex. + * Promise-chain serialization composes naturally across handler instances + * (including those re-instantiated after crash + retry). + * + * @internal Knowledge job handlers call this through + * `application.get('KnowledgeRuntimeService').runWithBaseWriteLockForBase(...)`. + * Do not call from outside the knowledge module — the lock is a private + * invariant of the indexing pipeline. + */ + async runWithBaseWriteLockForBase(baseId: string, task: () => Promise): Promise { + const previousLock = this.baseWriteLocks.get(baseId) ?? Promise.resolve() + let releaseCurrentLock!: () => void + const currentLock = new Promise((resolve) => { + releaseCurrentLock = resolve + }) + const nextLock = previousLock.catch(() => undefined).then(() => currentLock) + + this.baseWriteLocks.set(baseId, nextLock) + + try { + await previousLock.catch(() => undefined) + return await task() + } finally { + releaseCurrentLock() + if (this.baseWriteLocks.get(baseId) === nextLock) { + this.baseWriteLocks.delete(baseId) + } } } - private enqueueIndexItem(base: KnowledgeBase, item: IndexableKnowledgeItem): void { - const wasAlreadyQueued = this.hasQueuedItem(item.id) - let didStart = false - const promise = this.queue.enqueue({ - base, - item, - kind: 'index-leaf', - execute: (context) => { - didStart = true - return this.executeIndexTask(context) - } + /** + * Wait for Layer 3 locks to drain. When `baseId` is given waits only for + * that base; otherwise waits for every active lock. `timeoutMs` caps the + * wait; on timeout this logs a warning and returns so the caller (e.g. + * deleteBase) can proceed past a wedged handler. + */ + async waitForBaseWriteLocks(baseId?: string, timeoutMs?: number): Promise { + const locks = + baseId === undefined + ? [...this.baseWriteLocks.values()] + : [this.baseWriteLocks.get(baseId)].filter((l): l is Promise => l !== undefined) + + if (locks.length === 0) { + return + } + + const allSettled = Promise.allSettled(locks).then(() => undefined) + if (timeoutMs === undefined) { + await allSettled + return + } + + let timeoutHandle: NodeJS.Timeout | undefined + const timeout = new Promise<'timeout'>((resolve) => { + timeoutHandle = setTimeout(() => resolve('timeout'), timeoutMs) }) - void promise.catch((error) => { - if (wasAlreadyQueued || didStart) { - return - } - - void this.failItemsAfterEnqueueRejection([item.id], error, { - baseId: base.id, - itemId: item.id, - kind: 'index-leaf' - }) - }) - } - - private enqueuePrepareRoot( - base: KnowledgeBase, - item: KnowledgeItemOf<'directory'> | KnowledgeItemOf<'sitemap'> - ): void { - const wasAlreadyQueued = this.hasQueuedItem(item.id) - let didStart = false - const promise = this.queue.enqueue({ - base, - item, - kind: 'prepare-root', - execute: (context) => { - didStart = true - return this.executePrepareTask(context) - } - }) - - void promise.catch((error) => { - if (wasAlreadyQueued || didStart) { - return - } - - void this.failItemsAfterEnqueueRejection([item.id], error, { - baseId: base.id, - itemId: item.id, - kind: 'prepare-root' - }) - }) - } - - private async executePrepareTask(context: KnowledgeQueueTaskContext): Promise { - const { base, item } = context - const createdItemIds = new Set([item.id]) - try { - const leafItems = await prepareKnowledgeItem({ - baseId: base.id, - item, - onCreatedItem: (createdItem) => createdItemIds.add(createdItem.id), - runMutation: (task) => context.runWithBaseWriteLock(task), - signal: context.signal - }) - - for (const leafItem of leafItems) { - if (await this.shouldEnqueueLeaf(leafItem.id)) { - context.signal.throwIfAborted() - this.enqueueIndexItem(base, leafItem) - } + const winner = await Promise.race([allSettled.then(() => 'done' as const), timeout]) + if (winner === 'timeout') { + logger.warn('waitForBaseWriteLocks timed out', { + baseId: baseId ?? null, + timeoutMs, + lockCount: locks.length + }) } - - await context.runWithBaseWriteLock(async () => { - await knowledgeItemService.updateStatus(item.id, 'processing') - context.signal.throwIfAborted() - }) - } catch (error) { - if (context.signal.aborted) { - context.signal.throwIfAborted() - throw error + } finally { + if (timeoutHandle) { + clearTimeout(timeoutHandle) } - - const normalizedError = error instanceof Error ? error : new Error(String(error)) - await this.cleanupFailedItems(base, [...createdItemIds], item, normalizedError) - throw normalizedError } } - private async executeIndexTask(context: KnowledgeQueueTaskContext): Promise { - const { base, item } = context + private async enqueueRootItem(item: KnowledgeItem): Promise { + const jobManager = application.get('JobManager') - try { - await this.indexLeafItem(base, item, context) - } catch (error) { - if (context.signal.aborted) { - context.signal.throwIfAborted() - throw error - } - - const normalizedError = error instanceof Error ? error : new Error(String(error)) - await this.cleanupFailedItems(base, [item.id], item, normalizedError) - throw normalizedError - } - } - - private async indexLeafItem( - base: KnowledgeBase, - item: IndexableKnowledgeItem, - context: KnowledgeQueueTaskContext - ): Promise { - context.signal.throwIfAborted() - await context.runWithBaseWriteLock(() => - knowledgeItemService.updateStatus(item.id, 'processing', { phase: 'reading' }) - ) - const documents = await this.runTaskStep(context, () => loadKnowledgeItemDocuments(item, context.signal)) - this.assertHasIndexableContent(documents) - const chunks = await this.runTaskStep(context, () => chunkDocuments(base, item, documents)) - this.assertHasIndexableContent(chunks) - await context.runWithBaseWriteLock(() => - knowledgeItemService.updateStatus(item.id, 'processing', { phase: 'embedding' }) - ) - const nodes = await this.runTaskStep(context, () => embedDocuments(getEmbedModel(base), chunks, context.signal)) - - await context.runWithBaseWriteLock(async () => { - const vectorStoreService = application.get('KnowledgeVectorStoreService') - const activeVectorStore = await this.runTaskStep(context, () => vectorStoreService.createStore(base)) - - await this.runTaskStep(context, () => activeVectorStore.add(nodes)) - await knowledgeItemService.updateStatus(item.id, 'completed') - }) - } - - private async cleanupFailedItems( - base: KnowledgeBase, - itemIds: string[], - logItem: KnowledgeItem, - error: Error - ): Promise { - logger.error('Failed to process knowledge item runtime task', error, { - baseId: base.id, - itemId: logItem.id, - itemType: logItem.type - }) - - try { - await deleteItemVectors(base, itemIds) - } catch (cleanupError) { - logger.error( - 'Failed to cleanup knowledge item vectors after runtime failure', - cleanupError instanceof Error ? cleanupError : new Error(String(cleanupError)), - { - baseId: base.id, - itemIds - } + if (isContainerKnowledgeItem(item)) { + await jobManager.enqueue( + 'knowledge.prepare-root', + { baseId: item.baseId, itemId: item.id }, + { idempotencyKey: `knowledge:${item.baseId}:${item.id}` } ) + return } - await this.persistFailureStateBestEffort(itemIds, error.message, { - baseId: base.id, - itemId: logItem.id, - itemType: logItem.type, - operation: 'runtimeTaskFailure' - }) - } - - private async persistFailureStateBestEffort( - itemIds: string[], - reason: string, - context: Record - ): Promise { - try { - await failItems(itemIds, reason) - } catch (error) { - logger.error( - 'Failed to persist knowledge item failure state during runtime cleanup', - error instanceof Error ? error : new Error(String(error)), - { - ...context, - itemIds, - reason - } - ) - } - } - - private async deleteItemVectorsOrFailItems( - base: KnowledgeBase, - vectorItemIds: string[], - failureItemIds: string[], - context: Record - ): Promise { - try { - await deleteItemVectors(base, vectorItemIds) - } catch (error) { - await this.failItemsAndRethrow(failureItemIds, error, context) - } + await jobManager.enqueue( + 'knowledge.index-leaf', + { baseId: item.baseId, itemId: item.id, parentJobId: null }, + { idempotencyKey: `knowledge:${item.baseId}:${item.id}` } + ) } private async deleteAcceptedItemsBestEffort( @@ -524,142 +428,4 @@ export class KnowledgeRuntimeService extends BaseService { } } } - - private async failItemsAndRethrow( - itemIds: string[], - error: unknown, - context: Record - ): Promise { - const normalizedError = error instanceof Error ? error : new Error(String(error)) - await this.persistFailureStateBestEffort(itemIds, normalizedError.message, { - ...context, - operation: context.operation ?? 'strictRuntimeCleanup' - }) - throw error - } - - private hasQueuedItem(itemId: string): boolean { - const snapshot = this.queue.getSnapshot() - return [...snapshot.pending, ...snapshot.running].some((entry) => entry.itemId === itemId) - } - - private async failItemsAfterEnqueueRejection( - itemIds: string[], - error: unknown, - context: QueueTaskLogContext - ): Promise { - const normalizedError = error instanceof Error ? error : new Error(String(error)) - logger.error('Knowledge queue rejected runtime task before execution', normalizedError, context) - - try { - await failItems(itemIds, normalizedError.message) - } catch (failureStateError) { - logger.error( - 'Failed to persist knowledge item failure state after queue enqueue rejection', - failureStateError instanceof Error ? failureStateError : new Error(String(failureStateError)), - { - ...context, - itemIds, - enqueueError: normalizedError.message - } - ) - } - } - - private async interruptRootsAndDescendants( - baseId: string, - rootIds: string[], - reason: string - ): Promise<{ descendantItems: KnowledgeItem[]; interruptIds: string[] }> { - // Stop roots before descendant lookup so an active expansion cannot enqueue fresh children during cleanup. - this.queue.interruptItems(rootIds, reason) - await this.queue.waitForRunning(rootIds) - - const descendantItems = await knowledgeItemService.getDescendantItems(baseId, rootIds) - const interruptIds = [...rootIds, ...descendantItems.map((item) => item.id)] - this.queue.interruptItems(interruptIds, reason) - await this.queue.waitForRunning(interruptIds) - - return { descendantItems, interruptIds } - } - - private async runTaskStep(context: KnowledgeQueueTaskContext, step: () => Promise | T): Promise { - context.signal.throwIfAborted() - const result = await step() - context.signal.throwIfAborted() - return result - } - - private assertHasIndexableContent(items: T[]): void { - if (items.length === 0) { - throw new Error(KNOWLEDGE_EMPTY_CONTENT_REASON) - } - } - - private async shouldEnqueueLeaf(itemId: string): Promise { - try { - const item = await knowledgeItemService.getById(itemId) - return isIndexableKnowledgeItem(item) && item.status === 'processing' - } catch (error) { - if (isDataApiError(error) && error.code === ErrorCode.NOT_FOUND) { - return false - } - - throw error - } - } - - private async cleanupInterruptedEntries(entries: KnowledgeQueueTaskDescriptor[], reason: string): Promise { - const cleanupEntries = await this.expandInterruptedEntries(entries) - await this.deleteVectorsForQueueEntries(cleanupEntries) - await this.persistFailureStateBestEffort( - cleanupEntries.flatMap((entry) => entry.itemIds), - reason, - { - operation: 'interruptedRuntimeCleanup' - } - ) - } - - private async expandInterruptedEntries( - entries: KnowledgeQueueTaskDescriptor[] - ): Promise> { - const expandedEntries: Array<{ base: KnowledgeBase; baseId: string; itemIds: string[] }> = [] - - for (const entry of entries) { - if (entry.kind === 'index-leaf') { - expandedEntries.push({ base: entry.base, baseId: entry.baseId, itemIds: [entry.itemId] }) - continue - } - - const descendantItems = await knowledgeItemService.getDescendantItems(entry.baseId, [entry.itemId]) - expandedEntries.push({ - base: entry.base, - baseId: entry.baseId, - itemIds: [entry.itemId, ...descendantItems.map((item) => item.id)] - }) - } - - return expandedEntries - } - - private async deleteVectorsForQueueEntries( - entries: Array<{ base: KnowledgeBase; baseId: string; itemIds: string[] }> - ): Promise { - const entriesByBase = new Map() - for (const entry of entries) { - const existing = entriesByBase.get(entry.baseId) - if (existing) { - existing.itemIds.push(...entry.itemIds) - continue - } - - entriesByBase.set(entry.baseId, { - base: entry.base, - itemIds: entry.itemIds - }) - } - - await deleteVectorsForEntries([...entriesByBase.values()]) - } } diff --git a/src/main/services/knowledge/runtime/__tests__/KnowledgeRuntimeService.test.ts b/src/main/services/knowledge/runtime/__tests__/KnowledgeRuntimeService.test.ts index f19e0eb389..101a3ceef1 100644 --- a/src/main/services/knowledge/runtime/__tests__/KnowledgeRuntimeService.test.ts +++ b/src/main/services/knowledge/runtime/__tests__/KnowledgeRuntimeService.test.ts @@ -1,7 +1,6 @@ import type * as LifecycleModule from '@main/core/lifecycle' import { getDependencies, getPhase } from '@main/core/lifecycle/decorators' import { Phase } from '@main/core/lifecycle/types' -import { DataApiErrorFactory } from '@shared/data/api' import { DEFAULT_KNOWLEDGE_BASE_CHUNK_OVERLAP, DEFAULT_KNOWLEDGE_BASE_CHUNK_SIZE, @@ -9,62 +8,56 @@ import { type KnowledgeItem, type KnowledgeItemOf } from '@shared/data/types/knowledge' -import { beforeEach, describe, expect, it, vi } from 'vitest' - -import type { KnowledgeQueueManager } from '../../queue/KnowledgeQueueManager' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' const { + cancelManyMock, + cancelMock, chunkDocumentsMock, createVectorStoreMock, deleteVectorStoreMock, embedDocumentsMock, embedManyMock, + enqueueMock, getEmbedModelMock, - getStoreIfExistsMock, knowledgeBaseGetByIdMock, knowledgeItemCreateMock, - knowledgeItemDeleteMock, knowledgeItemDeleteLeafDescendantItemsMock, - knowledgeItemGetDescendantItemsMock, + knowledgeItemDeleteMock, knowledgeItemGetByIdMock, + knowledgeItemGetDescendantAndSelfItemsMock, knowledgeItemGetLeafDescendantItemsMock, - knowledgeItemReconcileContainersMock, knowledgeItemUpdateStatusMock, + listMock, loadKnowledgeItemDocumentsMock, - loggerErrorMock, + loggerWarnMock, prepareKnowledgeItemMock, rerankKnowledgeSearchResultsMock, - vectorStoreAddMock, - vectorStoreDeleteMock, - vectorStoreDeleteByIdAndExternalIdMock, - vectorStoreListByExternalIdMock, - vectorStoreQueryMock + registerHandlerMock } = vi.hoisted(() => ({ + cancelManyMock: vi.fn(), + cancelMock: vi.fn(), chunkDocumentsMock: vi.fn(), createVectorStoreMock: vi.fn(), deleteVectorStoreMock: vi.fn(), embedDocumentsMock: vi.fn(), embedManyMock: vi.fn(), + enqueueMock: vi.fn(), getEmbedModelMock: vi.fn(), - getStoreIfExistsMock: vi.fn(), knowledgeBaseGetByIdMock: vi.fn(), knowledgeItemCreateMock: vi.fn(), - knowledgeItemDeleteMock: vi.fn(), knowledgeItemDeleteLeafDescendantItemsMock: vi.fn(), - knowledgeItemGetDescendantItemsMock: vi.fn(), + knowledgeItemDeleteMock: vi.fn(), knowledgeItemGetByIdMock: vi.fn(), + knowledgeItemGetDescendantAndSelfItemsMock: vi.fn(), knowledgeItemGetLeafDescendantItemsMock: vi.fn(), - knowledgeItemReconcileContainersMock: vi.fn(), knowledgeItemUpdateStatusMock: vi.fn(), + listMock: vi.fn(), loadKnowledgeItemDocumentsMock: vi.fn(), - loggerErrorMock: vi.fn(), + loggerWarnMock: vi.fn(), prepareKnowledgeItemMock: vi.fn(), rerankKnowledgeSearchResultsMock: vi.fn(), - vectorStoreAddMock: vi.fn(), - vectorStoreDeleteMock: vi.fn(), - vectorStoreDeleteByIdAndExternalIdMock: vi.fn(), - vectorStoreListByExternalIdMock: vi.fn(), - vectorStoreQueryMock: vi.fn() + registerHandlerMock: vi.fn() })) vi.mock('@application', async () => { @@ -72,8 +65,14 @@ vi.mock('@application', async () => { return mockApplicationFactory({ KnowledgeVectorStoreService: { createStore: createVectorStoreMock, - deleteStore: deleteVectorStoreMock, - getStoreIfExists: getStoreIfExistsMock + deleteStore: deleteVectorStoreMock + }, + JobManager: { + registerHandler: registerHandlerMock, + enqueue: enqueueMock, + cancel: cancelMock, + cancelMany: cancelManyMock, + list: listMock } } as Parameters[0]) }) @@ -82,9 +81,9 @@ vi.mock('@logger', () => ({ loggerService: { withContext: () => ({ debug: vi.fn(), - error: loggerErrorMock, + error: vi.fn(), info: vi.fn(), - warn: vi.fn() + warn: loggerWarnMock }) } })) @@ -107,10 +106,9 @@ vi.mock('@data/services/KnowledgeItemService', () => ({ create: knowledgeItemCreateMock, delete: knowledgeItemDeleteMock, deleteLeafDescendantItems: knowledgeItemDeleteLeafDescendantItemsMock, - getDescendantItems: knowledgeItemGetDescendantItemsMock, getById: knowledgeItemGetByIdMock, + getDescendantAndSelfItems: knowledgeItemGetDescendantAndSelfItemsMock, getLeafDescendantItems: knowledgeItemGetLeafDescendantItemsMock, - reconcileContainers: knowledgeItemReconcileContainersMock, updateStatus: knowledgeItemUpdateStatusMock } })) @@ -149,22 +147,15 @@ vi.mock('../utils/prepare', () => ({ prepareKnowledgeItem: prepareKnowledgeItemMock })) -const { KnowledgeRuntimeService } = await import('..') +vi.mock('../utils/cleanup', () => ({ + deleteItemVectors: vi.fn() +})) -function createSearchNode(id: string, score: number, chunkIndex = 0) { - return { - id_: id, - score, - metadata: { - itemId: `item-${id}`, - itemType: 'note', - source: `note-${id}`, - chunkIndex, - tokenCount: 2 - }, - getContent: vi.fn(() => `content ${id}`) - } -} +// Imported AFTER mocks so the side-effect declare-module merge for both +// handlers is in scope when the runtime references them at module load. +const { KnowledgeRuntimeService } = await import('..') +const { prepareRootJobHandler } = await import('../../tasks/prepareRootJobHandler') +const { indexLeafJobHandler } = await import('../../tasks/indexLeafJobHandler') function createBase(): KnowledgeBase { return { @@ -181,7 +172,7 @@ function createBase(): KnowledgeBase { searchMode: 'hybrid', createdAt: '2026-04-08T00:00:00.000Z', updatedAt: '2026-04-08T00:00:00.000Z' - } + } as KnowledgeBase } function createNoteItem(id = 'note-1', status: KnowledgeItem['status'] = 'idle'): KnowledgeItemOf<'note'> { @@ -199,7 +190,7 @@ function createNoteItem(id = 'note-1', status: KnowledgeItem['status'] = 'idle') ...lifecycle, createdAt: '2026-04-08T00:00:00.000Z', updatedAt: '2026-04-08T00:00:00.000Z' - } + } as KnowledgeItemOf<'note'> } function createDirectoryItem(id = 'dir-1', status: KnowledgeItem['status'] = 'idle'): KnowledgeItemOf<'directory'> { @@ -213,1657 +204,255 @@ function createDirectoryItem(id = 'dir-1', status: KnowledgeItem['status'] = 'id baseId: 'kb-1', groupId: null, type: 'directory', - data: { source: `/docs/${id}`, path: `/docs/${id}` }, + data: { source: id, path: `/docs/${id}` }, ...lifecycle, createdAt: '2026-04-08T00:00:00.000Z', updatedAt: '2026-04-08T00:00:00.000Z' - } + } as KnowledgeItemOf<'directory'> } -function createQueueDescriptor( - base: KnowledgeBase, - item: KnowledgeItem, - kind: 'index-leaf' | 'prepare-root' -): { - base: KnowledgeBase - baseId: string - itemId: string - itemType: KnowledgeItem['type'] - kind: 'index-leaf' | 'prepare-root' -} { - return { - base, - baseId: base.id, - itemId: item.id, - itemType: item.type, - kind - } -} - -function createDeferred() { - let resolve!: (value: T | PromiseLike) => void - let reject!: (reason?: unknown) => void - const promise = new Promise((res, rej) => { - resolve = res - reject = rej - }) - - return { promise, reject, resolve } -} - -async function flushPromises(): Promise { - await Promise.resolve() - await Promise.resolve() -} - -describe('KnowledgeRuntimeService', () => { +describe('KnowledgeRuntimeService (Phase 4 JobManager backbone)', () => { beforeEach(() => { vi.clearAllMocks() - - createVectorStoreMock.mockResolvedValue({ - add: vectorStoreAddMock, - delete: vectorStoreDeleteMock, - deleteByIdAndExternalId: vectorStoreDeleteByIdAndExternalIdMock, - listByExternalId: vectorStoreListByExternalIdMock, - query: vectorStoreQueryMock - }) - getStoreIfExistsMock.mockResolvedValue({ - add: vectorStoreAddMock, - delete: vectorStoreDeleteMock, - deleteByIdAndExternalId: vectorStoreDeleteByIdAndExternalIdMock, - listByExternalId: vectorStoreListByExternalIdMock, - query: vectorStoreQueryMock - }) knowledgeBaseGetByIdMock.mockResolvedValue(createBase()) - deleteVectorStoreMock.mockResolvedValue(undefined) - vectorStoreAddMock.mockResolvedValue(undefined) - vectorStoreDeleteMock.mockResolvedValue(undefined) - vectorStoreDeleteByIdAndExternalIdMock.mockResolvedValue(undefined) - vectorStoreListByExternalIdMock.mockResolvedValue([]) - vectorStoreQueryMock.mockResolvedValue({ nodes: [], similarities: [] }) - getEmbedModelMock.mockReturnValue({ modelId: 'embedding-model' }) - loadKnowledgeItemDocumentsMock.mockResolvedValue([{ text: 'document' }]) - chunkDocumentsMock.mockReturnValue([{ text: 'chunk' }]) - embedDocumentsMock.mockResolvedValue([{ id_: 'node-1' }]) - embedManyMock.mockResolvedValue({ embeddings: [[0.1, 0.2]] }) - knowledgeItemCreateMock.mockImplementation(async (_baseId: string, item: { type: KnowledgeItem['type'] }) => { - if (item.type === 'directory') { - return createDirectoryItem('dir-1', 'idle') - } - - return createNoteItem('note-1', 'idle') - }) - knowledgeItemDeleteMock.mockResolvedValue(undefined) - knowledgeItemDeleteLeafDescendantItemsMock.mockResolvedValue(undefined) - knowledgeItemReconcileContainersMock.mockResolvedValue(undefined) - knowledgeItemGetLeafDescendantItemsMock.mockImplementation(async (_baseId: string, itemIds: string[]) => - itemIds.map((itemId) => createNoteItem(itemId, 'processing')) - ) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([]) knowledgeItemGetByIdMock.mockImplementation(async (id: string) => createNoteItem(id, 'processing')) - knowledgeItemUpdateStatusMock.mockImplementation( - async ( - id: string, - status: KnowledgeItem['status'], - update: { phase?: KnowledgeItem['phase']; error?: string | null } = {} - ) => ({ - ...(id.startsWith('dir') ? createDirectoryItem(id, status) : createNoteItem(id, status)), - phase: update.phase ?? null, - error: update.error ?? null - }) - ) - prepareKnowledgeItemMock.mockImplementation(async ({ item }: { item: KnowledgeItem }) => [item]) + knowledgeItemUpdateStatusMock.mockImplementation(async (id: string) => createNoteItem(id, 'processing')) + knowledgeItemGetDescendantAndSelfItemsMock.mockResolvedValue([]) + knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([]) + listMock.mockResolvedValue([]) + enqueueMock.mockResolvedValue({ id: 'job-id', snapshot: {}, finished: Promise.resolve({}) }) + cancelMock.mockResolvedValue(undefined) + cancelManyMock.mockResolvedValue(undefined) }) - it('uses WhenReady phase and depends on KnowledgeVectorStoreService', () => { - expect(getPhase(KnowledgeRuntimeService)).toBe(Phase.WhenReady) - expect(getDependencies(KnowledgeRuntimeService)).toEqual(['KnowledgeVectorStoreService']) + afterEach(() => { + vi.useRealTimers() }) - it('returns from addItems after enqueueing and completes indexing in the background', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const item = createNoteItem('note-1', 'processing') - const readDeferred = createDeferred() - loadKnowledgeItemDocumentsMock.mockReturnValueOnce(readDeferred.promise) - - await service.addItems(base.id, [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - expect(knowledgeItemCreateMock).toHaveBeenCalledWith(base.id, { - groupId: undefined, - type: 'note', - data: { - source: 'note-1', - content: 'hello note-1' - } + describe('lifecycle decorators', () => { + it('runs in WhenReady phase and depends on KnowledgeVectorStoreService', () => { + expect(getPhase(KnowledgeRuntimeService)).toBe(Phase.WhenReady) + expect(getDependencies(KnowledgeRuntimeService)).toEqual(['KnowledgeVectorStoreService']) }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'processing') - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(item, expect.any(AbortSignal)) - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'processing', { phase: 'reading' }) - expect(vectorStoreAddMock).not.toHaveBeenCalled() - - readDeferred.resolve([{ text: 'document' }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'completed') - }) - expect(knowledgeItemUpdateStatusMock.mock.calls.map((call) => call[1])).toEqual([ - 'processing', - 'processing', - 'processing', - 'completed' - ]) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'processing', { phase: 'reading' }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'processing', { phase: 'embedding' }) - expect(vectorStoreAddMock).toHaveBeenCalledWith([{ id_: 'node-1' }]) - expect( - knowledgeItemUpdateStatusMock.mock.invocationCallOrder[ - knowledgeItemUpdateStatusMock.mock.calls.findIndex( - (call) => call[0] === item.id && call[1] === 'processing' && call[2]?.phase === 'embedding' - ) - ] - ).toBeLessThan(vectorStoreAddMock.mock.invocationCallOrder[0]) }) - it('serializes same-base vector writes and completion status updates', async () => { - const service = new KnowledgeRuntimeService() - const releaseFirstVectorWrite = createDeferred() - const firstVectorWriteStarted = createDeferred() - const events: string[] = [] + describe('onInit', () => { + it('registers both knowledge JobHandlers', async () => { + const service = new KnowledgeRuntimeService() + ;(service as unknown as { onInit: () => void }).onInit() - knowledgeItemCreateMock - .mockResolvedValueOnce(createNoteItem('note-1', 'idle')) - .mockResolvedValueOnce(createNoteItem('note-2', 'idle')) - chunkDocumentsMock.mockImplementation((_base: KnowledgeBase, item: KnowledgeItem) => [{ text: item.id }]) - embedDocumentsMock.mockImplementation(async (_model: unknown, chunks: Array<{ text: string }>) => [ - { id_: `node-${chunks[0].text}` } - ]) - vectorStoreAddMock.mockImplementation(async (nodes: Array<{ id_: string }>) => { - const itemId = nodes[0].id_.replace('node-', '') - events.push(`vector:start:${itemId}`) - - if (itemId === 'note-1') { - firstVectorWriteStarted.resolve() - await releaseFirstVectorWrite.promise - } - - events.push(`vector:end:${itemId}`) + expect(registerHandlerMock).toHaveBeenCalledTimes(2) + expect(registerHandlerMock).toHaveBeenCalledWith('knowledge.prepare-root', prepareRootJobHandler) + expect(registerHandlerMock).toHaveBeenCalledWith('knowledge.index-leaf', indexLeafJobHandler) }) - knowledgeItemUpdateStatusMock.mockImplementation( - async ( - id: string, - status: KnowledgeItem['status'], - update: { phase?: KnowledgeItem['phase']; error?: string | null } = {} - ) => { - if (status === 'completed') { - events.push(`status:completed:${id}`) - } + }) - return { - ...createNoteItem(id, status), - phase: update.phase ?? null, - error: update.error ?? null - } - } - ) + describe('onStop', () => { + it('cancels both job types and waits for outstanding locks (no item.status rollback)', async () => { + const service = new KnowledgeRuntimeService() + await (service as unknown as { onStop: () => Promise }).onStop() - await service.addItems('kb-1', [ - { type: 'note', data: { source: 'note-1', content: 'hello note-1' } }, - { type: 'note', data: { source: 'note-2', content: 'hello note-2' } } - ]) - await firstVectorWriteStarted.promise - await flushPromises() + expect(cancelManyMock).toHaveBeenCalledTimes(2) + const calledTypes = cancelManyMock.mock.calls.map((c) => (c[0] as { type?: string }).type) + expect(calledTypes).toEqual(expect.arrayContaining(['knowledge.prepare-root', 'knowledge.index-leaf'])) + expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalled() + }) + }) - expect(events).toEqual(['vector:start:note-1']) + describe('cancelAllJobsForBase', () => { + it('delegates to JobManager.cancelMany with a per-base queue filter', async () => { + const service = new KnowledgeRuntimeService() + await service.cancelAllJobsForBase('kb-99') - releaseFirstVectorWrite.resolve() + expect(cancelManyMock).toHaveBeenCalledWith({ queue: 'base.kb-99' }, 'delete-base') + }) + }) - await vi.waitFor(() => { - expect(events).toEqual([ - 'vector:start:note-1', - 'vector:end:note-1', - 'status:completed:note-1', - 'vector:start:note-2', - 'vector:end:note-2', - 'status:completed:note-2' + describe('addItems', () => { + it('returns immediately for empty inputs without acquiring base or JobManager', async () => { + const service = new KnowledgeRuntimeService() + await service.addItems('kb-1', []) + + expect(knowledgeBaseGetByIdMock).not.toHaveBeenCalled() + expect(enqueueMock).not.toHaveBeenCalled() + }) + + it('enqueues a leaf item via knowledge.index-leaf with idempotency key', async () => { + const noteItem = createNoteItem('note-1', 'idle') + const processingNote = createNoteItem('note-1', 'processing') + knowledgeItemCreateMock.mockResolvedValueOnce(noteItem) + knowledgeItemUpdateStatusMock.mockResolvedValueOnce(processingNote) + + const service = new KnowledgeRuntimeService() + await service.addItems('kb-1', [{ type: 'note', data: noteItem.data }]) + + expect(enqueueMock).toHaveBeenCalledTimes(1) + const [type, payload, opts] = enqueueMock.mock.calls[0] + expect(type).toBe('knowledge.index-leaf') + expect(payload).toEqual({ baseId: 'kb-1', itemId: 'note-1', parentJobId: null }) + expect(opts).toEqual({ idempotencyKey: 'knowledge:kb-1:note-1' }) + }) + + it('enqueues a container as knowledge.prepare-root with the original item type', async () => { + const dirItem = createDirectoryItem('dir-1', 'idle') + const preparingDir = createDirectoryItem('dir-1', 'processing') + knowledgeItemCreateMock.mockResolvedValueOnce(dirItem) + knowledgeItemUpdateStatusMock.mockResolvedValueOnce(preparingDir) + + const service = new KnowledgeRuntimeService() + await service.addItems('kb-1', [{ type: 'directory', data: dirItem.data }]) + + expect(enqueueMock).toHaveBeenCalledTimes(1) + const [type, payload] = enqueueMock.mock.calls[0] + expect(type).toBe('knowledge.prepare-root') + expect(payload).toEqual({ baseId: 'kb-1', itemId: 'dir-1' }) + }) + + it('rolls back accepted items when one input fails partway through', async () => { + const noteItem = createNoteItem('note-1', 'idle') + knowledgeItemCreateMock.mockResolvedValueOnce(noteItem).mockRejectedValueOnce(new Error('create failed')) + + const service = new KnowledgeRuntimeService() + await expect( + service.addItems('kb-1', [ + { type: 'note', data: noteItem.data }, + { type: 'note', data: noteItem.data } + ]) + ).rejects.toThrow('create failed') + + expect(knowledgeItemDeleteMock).toHaveBeenCalledWith('note-1') + expect(enqueueMock).not.toHaveBeenCalled() + }) + }) + + describe('reindexItems', () => { + it('cancels active jobs whose itemId is in the targeted subtree, then re-enqueues roots', async () => { + const root = createNoteItem('note-1', 'processing') + const descendant = createNoteItem('descendant-1', 'processing') + knowledgeItemGetDescendantAndSelfItemsMock.mockResolvedValueOnce([root, descendant]) + knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([root, descendant]) + listMock.mockResolvedValueOnce([ + { id: 'job-active', input: { itemId: 'descendant-1' } }, + { id: 'job-unrelated', input: { itemId: 'other-item' } } ]) - }) - }) - it('serializes addItems acceptance with same-base queue writes before enqueueing runtime work', async () => { - const service = new KnowledgeRuntimeService() - const releaseActiveWrite = createDeferred() - const activeWriteStarted = createDeferred() - const events: string[] = [] + const service = new KnowledgeRuntimeService() + await service.reindexItems('kb-1', [root]) - const queue = (service as unknown as { queue: KnowledgeQueueManager }).queue - const activeWritePromise = queue.runWithBaseWriteLockForBase('kb-1', async () => { - events.push('lock:active') - activeWriteStarted.resolve() - await releaseActiveWrite.promise - events.push('unlock:active') + expect(cancelMock).toHaveBeenCalledTimes(1) + expect(cancelMock).toHaveBeenCalledWith('job-active', 'reindex') + // Leaf re-enqueue uses the index-leaf path. + expect(enqueueMock).toHaveBeenCalledTimes(1) + const [type] = enqueueMock.mock.calls[0] + expect(type).toBe('knowledge.index-leaf') }) - await activeWriteStarted.promise - const addPromise = service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - await flushPromises() + it('deletes prior leaf descendants for container roots before re-enqueueing', async () => { + const dir = createDirectoryItem('dir-1', 'processing') + knowledgeItemGetDescendantAndSelfItemsMock.mockResolvedValueOnce([dir]) + knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([]) - expect(knowledgeItemCreateMock).not.toHaveBeenCalled() - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - expect(events).toEqual(['lock:active']) + const service = new KnowledgeRuntimeService() + await service.reindexItems('kb-1', [dir]) - releaseActiveWrite.resolve() - await addPromise - await activeWritePromise - - expect(knowledgeItemCreateMock).toHaveBeenCalledWith('kb-1', { - groupId: undefined, - type: 'note', - data: { source: 'note-1', content: 'hello note-1' } - }) - expect(events).toEqual(['lock:active', 'unlock:active']) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith( - createNoteItem('note-1', 'processing'), - expect.any(AbortSignal) + expect(knowledgeItemDeleteLeafDescendantItemsMock).toHaveBeenCalledWith('kb-1', ['dir-1']) + expect(enqueueMock).toHaveBeenCalledWith( + 'knowledge.prepare-root', + expect.objectContaining({ itemId: 'dir-1' }), + expect.objectContaining({ idempotencyKey: 'knowledge:kb-1:dir-1' }) ) }) - expect( - knowledgeItemUpdateStatusMock.mock.invocationCallOrder[ - knowledgeItemUpdateStatusMock.mock.calls.findIndex((call) => call[0] === 'note-1' && call[1] === 'processing') - ] - ).toBeLessThan(loadKnowledgeItemDocumentsMock.mock.invocationCallOrder[0]) - }) - it('cleans up accepted roots when batch acceptance fails', async () => { - const service = new KnowledgeRuntimeService() - const acceptError = new Error('create failed') - knowledgeItemCreateMock.mockResolvedValueOnce(createNoteItem('note-1', 'idle')).mockRejectedValueOnce(acceptError) + it('silently swallows cancel failures (job may already be terminal)', async () => { + const root = createNoteItem('note-1', 'processing') + knowledgeItemGetDescendantAndSelfItemsMock.mockResolvedValueOnce([root]) + knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([root]) + listMock.mockResolvedValueOnce([{ id: 'job-stale', input: { itemId: 'note-1' } }]) + cancelMock.mockRejectedValueOnce(new Error('already terminal')) - await expect( - service.addItems('kb-1', [ - { type: 'note', data: { source: 'note-1', content: 'hello 1' } }, - { type: 'note', data: { source: 'note-2', content: 'hello 2' } } - ]) - ).rejects.toBe(acceptError) - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-1', 'processing') - expect(knowledgeItemDeleteMock).toHaveBeenCalledWith('note-1') - expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalledWith('note-1', 'failed', { error: 'create failed' }) - expect(loggerErrorMock).toHaveBeenCalledWith('Failed to add knowledge items', acceptError, { - baseId: 'kb-1', - accepted: 1, - total: 2 - }) - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - }) - - it('keeps the original addItems error when accepted item rollback fails', async () => { - const service = new KnowledgeRuntimeService() - const acceptError = new Error('create failed') - const cleanupError = new Error('delete failed') - knowledgeItemCreateMock.mockResolvedValueOnce(createNoteItem('note-1', 'idle')).mockRejectedValueOnce(acceptError) - knowledgeItemDeleteMock.mockRejectedValueOnce(cleanupError) - - await expect( - service.addItems('kb-1', [ - { type: 'note', data: { source: 'note-1', content: 'hello 1' } }, - { type: 'note', data: { source: 'note-2', content: 'hello 2' } } - ]) - ).rejects.toBe(acceptError) - - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Failed to rollback accepted knowledge item after addItems failure', - cleanupError, - { - baseId: 'kb-1', - itemId: 'note-1', - addError: acceptError.message - } - ) - }) - - it('marks an indexable item failed when queue enqueue rejects before execution', async () => { - const service = new KnowledgeRuntimeService() - const enqueueError = new Error('queue resetting') - const enqueueMock = vi.fn().mockRejectedValue(enqueueError) - const getSnapshotMock = vi.fn().mockReturnValue({ pending: [], running: [] }) - const runWithBaseWriteLockForBaseMock = vi.fn(async (_baseId: string, task: () => Promise) => task()) - - ;( - service as unknown as { - queue: { - enqueue: typeof enqueueMock - getSnapshot: typeof getSnapshotMock - runWithBaseWriteLockForBase: typeof runWithBaseWriteLockForBaseMock - } - } - ).queue = { - enqueue: enqueueMock, - getSnapshot: getSnapshotMock, - runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock - } - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-1', 'failed', { error: 'queue resetting' }) - }) - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Knowledge queue rejected runtime task before execution', - enqueueError, - { - baseId: 'kb-1', - itemId: 'note-1', - kind: 'index-leaf' - } - ) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - }) - - it('marks a preparation root failed when queue enqueue rejects before execution', async () => { - const service = new KnowledgeRuntimeService() - const enqueueError = new Error('queue resetting') - const enqueueMock = vi.fn().mockRejectedValue(enqueueError) - const getSnapshotMock = vi.fn().mockReturnValue({ pending: [], running: [] }) - const runWithBaseWriteLockForBaseMock = vi.fn(async (_baseId: string, task: () => Promise) => task()) - - ;( - service as unknown as { - queue: { - enqueue: typeof enqueueMock - getSnapshot: typeof getSnapshotMock - runWithBaseWriteLockForBase: typeof runWithBaseWriteLockForBaseMock - } - } - ).queue = { - enqueue: enqueueMock, - getSnapshot: getSnapshotMock, - runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock - } - - await service.addItems('kb-1', [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('dir-1', 'failed', { error: 'queue resetting' }) - }) - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Knowledge queue rejected runtime task before execution', - enqueueError, - { - baseId: 'kb-1', - itemId: 'dir-1', - kind: 'prepare-root' - } - ) - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - }) - - it('marks an indexable item failed when queue interrupt rejects before execution', async () => { - const service = new KnowledgeRuntimeService() - const enqueueError = new Error('Knowledge task interrupted by service shutdown') - enqueueError.name = 'KnowledgeQueueInterruptedError' - const enqueueMock = vi.fn().mockRejectedValue(enqueueError) - const getSnapshotMock = vi.fn().mockReturnValue({ pending: [], running: [] }) - const runWithBaseWriteLockForBaseMock = vi.fn(async (_baseId: string, task: () => Promise) => task()) - - ;( - service as unknown as { - queue: { - enqueue: typeof enqueueMock - getSnapshot: typeof getSnapshotMock - runWithBaseWriteLockForBase: typeof runWithBaseWriteLockForBaseMock - } - } - ).queue = { - enqueue: enqueueMock, - getSnapshot: getSnapshotMock, - runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock - } - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-1', 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - }) - - it('marks a pending indexable item failed when the real queue resets before execution', async () => { - const service = new KnowledgeRuntimeService() - const queue = (service as unknown as { queue: KnowledgeQueueManager }).queue - const runningBlockers = Array.from({ length: 5 }, () => createDeferred()) - const inputs = [ - ...runningBlockers.map((_, index) => ({ - type: 'note' as const, - data: { source: `note-${index + 1}`, content: `hello note-${index + 1}` } - })), - { type: 'note' as const, data: { source: 'note-pending', content: 'hello note-pending' } } - ] - - knowledgeItemCreateMock.mockImplementation(async () => - createNoteItem(`note-${knowledgeItemCreateMock.mock.calls.length}`, 'idle') - ) - loadKnowledgeItemDocumentsMock.mockImplementation((item: KnowledgeItem) => { - const runningIndex = Number(item.id.replace('note-', '')) - 1 - return runningBlockers[runningIndex]?.promise ?? Promise.resolve([{ text: 'unexpected pending execution' }]) - }) - - await service.addItems('kb-1', inputs) - - await vi.waitFor(() => { - expect(queue.getSnapshot().running).toHaveLength(5) - expect(queue.getSnapshot().pending).toHaveLength(1) - }) - - const resetPromise = queue.reset('queue resetting') - - for (const blocker of runningBlockers) { - blocker.resolve([{ text: 'document' }]) - } - - await resetPromise - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-6', 'failed', { error: 'queue resetting' }) - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith( - expect.objectContaining({ id: 'note-6' }), - expect.any(AbortSignal) - ) - }) - - it('marks a preparation root failed when queue interrupt rejects before execution', async () => { - const service = new KnowledgeRuntimeService() - const enqueueError = new Error('Knowledge task interrupted by service shutdown') - enqueueError.name = 'KnowledgeQueueInterruptedError' - const enqueueMock = vi.fn().mockRejectedValue(enqueueError) - const getSnapshotMock = vi.fn().mockReturnValue({ pending: [], running: [] }) - const runWithBaseWriteLockForBaseMock = vi.fn(async (_baseId: string, task: () => Promise) => task()) - - ;( - service as unknown as { - queue: { - enqueue: typeof enqueueMock - getSnapshot: typeof getSnapshotMock - runWithBaseWriteLockForBase: typeof runWithBaseWriteLockForBaseMock - } - } - ).queue = { - enqueue: enqueueMock, - getSnapshot: getSnapshotMock, - runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock - } - - await service.addItems('kb-1', [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('dir-1', 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - }) - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - }) - - it('marks an item failed when indexing throws', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - loadKnowledgeItemDocumentsMock.mockRejectedValueOnce(new Error('read failed')) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { error: 'read failed' }) - }) - expect(vectorStoreAddMock).not.toHaveBeenCalled() - }) - - it('marks an item failed when the reader returns no documents', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - loadKnowledgeItemDocumentsMock.mockResolvedValueOnce([]) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'KNOWLEDGE_EMPTY_CONTENT' - }) - }) - expect(chunkDocumentsMock).not.toHaveBeenCalled() - expect(embedDocumentsMock).not.toHaveBeenCalled() - expect(vectorStoreAddMock).not.toHaveBeenCalled() - }) - - it('marks an item failed when chunking produces no chunks', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - chunkDocumentsMock.mockReturnValueOnce([]) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'KNOWLEDGE_EMPTY_CONTENT' - }) - }) - expect(embedDocumentsMock).not.toHaveBeenCalled() - expect(vectorStoreAddMock).not.toHaveBeenCalled() - }) - - it('marks an item failed when chunk metadata validation throws before embedding', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - chunkDocumentsMock.mockImplementationOnce(() => { - throw new Error('Invalid chunk metadata') - }) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'Invalid chunk metadata' - }) - }) - expect(embedDocumentsMock).not.toHaveBeenCalled() - expect(vectorStoreAddMock).not.toHaveBeenCalled() - }) - - it('deletes vectors when indexing fails after vector write starts', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - knowledgeItemUpdateStatusMock.mockImplementation( - async ( - id: string, - status: KnowledgeItem['status'], - update: { phase?: KnowledgeItem['phase']; error?: string | null } = {} - ) => { - if (status === 'completed') { - throw new Error('completed write failed') - } - - return { - ...createNoteItem(id, status), - phase: update.phase ?? null, - error: update.error ?? null - } - } - ) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'completed write failed' - }) - }) - expect(vectorStoreAddMock).toHaveBeenCalledWith([{ id_: 'node-1' }]) - expect(getStoreIfExistsMock).toHaveBeenCalledWith(createBase()) - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(item.id) - }) - - it('marks root and created children failed when expansion fails', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const expansionError = new Error('child creation failed') - - knowledgeItemCreateMock.mockResolvedValueOnce(createDirectoryItem('dir-1', 'idle')) - knowledgeItemUpdateStatusMock.mockImplementation( - async ( - id: string, - status: KnowledgeItem['status'], - update: { phase?: KnowledgeItem['phase']; error?: string | null } = {} - ) => { - if (id === root.id) { - return { ...root, status, phase: update.phase ?? null, error: update.error ?? null } - } - - return { - ...createNoteItem(id, status), - phase: update.phase ?? null, - error: update.error ?? null - } - } - ) - prepareKnowledgeItemMock.mockImplementationOnce( - async ({ onCreatedItem }: { onCreatedItem: (item: KnowledgeItem) => void }) => { - onCreatedItem(child) - throw expansionError - } - ) - - await service.addItems('kb-1', [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: expansionError.message - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(child.id, 'failed', { - error: expansionError.message - }) - }) - }) - - it('does not enqueue an expanded child that was deleted before expansion finished', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const childCreated = createDeferred() - const finishPreparation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - knowledgeItemGetLeafDescendantItemsMock.mockImplementation(async (_baseId: string, itemIds: string[]) => - itemIds.includes(child.id) ? [child] : [] - ) - prepareKnowledgeItemMock.mockImplementationOnce( - async ({ onCreatedItem }: { onCreatedItem: (item: KnowledgeItem) => void }) => { - onCreatedItem(child) - childCreated.resolve() - await finishPreparation.promise - return [child] - } - ) - knowledgeItemGetByIdMock.mockImplementation(async (id: string) => { - if (id === child.id) { - throw DataApiErrorFactory.notFound('KnowledgeItem', id) - } - - return createNoteItem(id, 'processing') - }) - - const addPromise = service.addItems(base.id, [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await childCreated.promise - - await service.deleteItems(base.id, [child]) - finishPreparation.resolve() - await addPromise - - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(child.id) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(child, expect.any(AbortSignal)) - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'processing') - }) - expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalledWith(root.id, 'failed', { - error: 'Knowledge task interrupted by item deletion' - }) - }) - - it('fails preparation when leaf enqueue lookup fails unexpectedly', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const lookupError = new Error('database unavailable') - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([child]) - knowledgeItemGetByIdMock.mockRejectedValueOnce(lookupError) - - await service.addItems(base.id, [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: lookupError.message - }) - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(child, expect.any(AbortSignal)) - }) - - it('finalizes a prepared container when every expanded leaf is gone before enqueue', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const firstChild = createNoteItem('child-1', 'processing') - const secondChild = createNoteItem('child-2', 'processing') - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([firstChild, secondChild]) - knowledgeItemGetByIdMock.mockImplementation(async (id: string) => - Promise.reject(DataApiErrorFactory.notFound('KnowledgeItem', id)) - ) - - await service.addItems(base.id, [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'processing') - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(firstChild, expect.any(AbortSignal)) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(secondChild, expect.any(AbortSignal)) - }) - - it('reconciles a prepared container after enqueueing leaves', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const firstChild = createNoteItem('child-1', 'processing') - const secondChild = createNoteItem('child-2', 'processing') - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([firstChild, secondChild]) - knowledgeItemGetByIdMock.mockImplementation(async (id: string) => { - if (id === firstChild.id) { - return firstChild - } - - throw DataApiErrorFactory.notFound('KnowledgeItem', id) - }) - - await service.addItems(base.id, [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(firstChild, expect.any(AbortSignal)) - }) - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'processing') - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(secondChild, expect.any(AbortSignal)) - }) - - it('does not finalize a prepared container after preparation is interrupted', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'processing') - const preparationStarted = createDeferred() - const finishPreparation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockImplementationOnce(async ({ signal }: { signal: AbortSignal }) => { - preparationStarted.resolve() - await finishPreparation.promise - signal.throwIfAborted() - return [createNoteItem('child-1', 'processing')] - }) - - const addPromise = service.addItems('kb-1', [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await preparationStarted.promise - - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - await flushPromises() - - finishPreparation.resolve() - await stopPromise - await addPromise - - expect(knowledgeItemReconcileContainersMock).not.toHaveBeenCalled() - }) - - it('interrupts parent preparation during delete so stale leaves are not enqueued', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const preparationStarted = createDeferred() - const finishPreparation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - prepareKnowledgeItemMock.mockImplementationOnce(async ({ signal }: { signal: AbortSignal }) => { - preparationStarted.resolve() - await finishPreparation.promise - signal.throwIfAborted() - return [child] - }) - - const addPromise = service.addItems(base.id, [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await preparationStarted.promise - - const deletePromise = service.deleteItems(base.id, [root]) - await flushPromises() - - finishPreparation.resolve() - await deletePromise - await addPromise - - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(child.id) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(child, expect.any(AbortSignal)) - expect(knowledgeItemGetDescendantItemsMock).toHaveBeenCalledWith(base.id, [root.id]) - }) - - it('interrupts a child task that preparation enqueued before delete descendant lookup', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const childReadStarted = createDeferred() - const finishChildRead = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([child]) - knowledgeItemGetByIdMock.mockResolvedValue(child) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - loadKnowledgeItemDocumentsMock.mockImplementationOnce(async () => { - childReadStarted.resolve() - return await finishChildRead.promise - }) - - await service.addItems(base.id, [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - await childReadStarted.promise - - const deletePromise = service.deleteItems(base.id, [root]) - await vi.waitFor(() => { - expect(knowledgeItemGetDescendantItemsMock).toHaveBeenCalledWith(base.id, [root.id]) - }) - await flushPromises() - - finishChildRead.resolve([{ text: 'document' }]) - await deletePromise - - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(child.id) - expect(chunkDocumentsMock).not.toHaveBeenCalled() - expect(vectorStoreAddMock).not.toHaveBeenCalled() - expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalledWith(child.id, 'completed') - }) - - it('marks interrupted delete items failed when strict vector cleanup fails', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - vectorStoreDeleteMock.mockRejectedValueOnce(new Error('delete failed')) - - await expect(service.deleteItems(base.id, [root])).rejects.toThrow( - 'Failed to delete vectors for knowledge items in base kb-1: child-1' - ) - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'Failed to delete vectors for knowledge items in base kb-1: child-1' - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(child.id, 'failed', { - error: 'Failed to delete vectors for knowledge items in base kb-1: child-1' - }) - }) - - it('preserves the original delete cleanup error when failure-state persistence fails', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const failureStateError = new Error('database locked') - - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - vectorStoreDeleteMock.mockRejectedValueOnce(new Error('delete failed')) - knowledgeItemUpdateStatusMock.mockRejectedValue(failureStateError) - - await expect(service.deleteItems(base.id, [root])).rejects.toThrow( - 'Failed to delete vectors for knowledge items in base kb-1: child-1' - ) - - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Failed to persist knowledge item failure state during runtime cleanup', - expect.objectContaining({ name: 'FailedToPersistFailureStateError' }), - { - baseId: base.id, - itemIds: [root.id, child.id], - operation: 'deleteItems', - reason: 'Failed to delete vectors for knowledge items in base kb-1: child-1', - rootIds: [root.id] - } - ) - }) - - it('marks delete roots failed when descendant lookup fails after interruption', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const lookupError = new Error('descendant lookup failed') - - knowledgeItemGetDescendantItemsMock.mockRejectedValueOnce(lookupError) - - await expect(service.deleteItems(base.id, [root])).rejects.toBe(lookupError) - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'descendant lookup failed' - }) - expect(knowledgeItemGetLeafDescendantItemsMock).not.toHaveBeenCalled() - expect(vectorStoreDeleteMock).not.toHaveBeenCalled() - }) - - it('reindexes a child during parent expansion without duplicating the parent-submitted leaf task', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const childCreated = createDeferred() - const finishPreparation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - knowledgeItemGetLeafDescendantItemsMock.mockImplementation(async (_baseId: string, itemIds: string[]) => - itemIds.includes(child.id) ? [child] : [] - ) - prepareKnowledgeItemMock.mockImplementationOnce( - async ({ onCreatedItem }: { onCreatedItem: (item: KnowledgeItem) => void }) => { - onCreatedItem(child) - childCreated.resolve() - await finishPreparation.promise - return [child] - } - ) - - const addPromise = service.addItems(base.id, [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await childCreated.promise - - await service.reindexItems(base.id, [child]) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(child, expect.any(AbortSignal)) - }) - - finishPreparation.resolve() - await addPromise - await vi.waitFor(() => { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(child.id, 'completed') - }) - - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledTimes(1) - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(child.id) - }) - - it('interrupts parent preparation during reindex before scheduling a fresh preparation task', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const oldChild = createNoteItem('old-child-1', 'processing') - const newChild = createNoteItem('new-child-1', 'processing') - const oldPreparationStarted = createDeferred() - const finishOldPreparation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - knowledgeItemGetDescendantItemsMock.mockImplementation(async (_baseId: string, itemIds: string[]) => - itemIds.includes(root.id) ? [oldChild] : [] - ) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([oldChild]) - prepareKnowledgeItemMock - .mockImplementationOnce(async ({ signal }: { signal: AbortSignal }) => { - oldPreparationStarted.resolve() - await finishOldPreparation.promise - signal.throwIfAborted() - return [oldChild] - }) - .mockResolvedValueOnce([newChild]) - - const addPromise = service.addItems(base.id, [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await oldPreparationStarted.promise - - const reindexPromise = service.reindexItems(base.id, [root]) - await flushPromises() - - finishOldPreparation.resolve() - await reindexPromise - await addPromise - - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(newChild, expect.any(AbortSignal)) - }) - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(oldChild, expect.any(AbortSignal)) - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(oldChild.id) - expect(knowledgeItemDeleteLeafDescendantItemsMock).toHaveBeenCalledWith(base.id, [root.id]) - }) - - it('interrupts a child task that preparation enqueued before reindex descendant lookup', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const oldChild = createNoteItem('old-child-1', 'processing') - const newChild = createNoteItem('new-child-1', 'processing') - const childReadStarted = createDeferred() - const finishChildRead = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([oldChild]).mockResolvedValueOnce([newChild]) - knowledgeItemGetByIdMock.mockImplementation(async (id: string) => - id === newChild.id ? newChild : createNoteItem(id, 'processing') - ) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([oldChild]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([oldChild]) - loadKnowledgeItemDocumentsMock.mockImplementationOnce(async () => { - childReadStarted.resolve() - return await finishChildRead.promise - }) - - await service.addItems(base.id, [{ type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } }]) - await childReadStarted.promise - - const reindexPromise = service.reindexItems(base.id, [root]) - await flushPromises() - - finishChildRead.resolve([{ text: 'document' }]) - await reindexPromise - - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(oldChild.id) - expect(knowledgeItemDeleteLeafDescendantItemsMock).toHaveBeenCalledWith(base.id, [root.id]) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(newChild, expect.any(AbortSignal)) - }) - }) - - it('marks interrupted reindex items failed and does not rebuild when strict vector cleanup fails', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - vectorStoreDeleteMock.mockRejectedValueOnce(new Error('delete failed')) - - await expect(service.reindexItems(base.id, [root])).rejects.toThrow( - 'Failed to delete vectors for knowledge items in base kb-1: child-1' - ) - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'Failed to delete vectors for knowledge items in base kb-1: child-1' - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(child.id, 'failed', { - error: 'Failed to delete vectors for knowledge items in base kb-1: child-1' - }) - expect(knowledgeItemDeleteLeafDescendantItemsMock).not.toHaveBeenCalled() - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - }) - - it('marks interrupted reindex items failed and does not rebuild when descendant deletion fails', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const deletionError = new Error('delete descendants failed') - const failureStateError = new Error('database locked') - - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValue([child]) - knowledgeItemDeleteLeafDescendantItemsMock.mockRejectedValueOnce(deletionError) - knowledgeItemUpdateStatusMock.mockRejectedValue(failureStateError) - - await expect(service.reindexItems(base.id, [root])).rejects.toBe(deletionError) - - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(child.id) - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Failed to persist knowledge item failure state during runtime cleanup', - expect.objectContaining({ name: 'FailedToPersistFailureStateError' }), - { - baseId: base.id, - itemIds: [root.id, child.id], - operation: 'reindexItems', - reason: 'delete descendants failed', - rootIds: [root.id] - } - ) - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - }) - - it('marks reindex roots failed and does not rebuild when descendant lookup fails after interruption', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const root = createDirectoryItem('dir-1', 'processing') - const lookupError = new Error('descendant lookup failed') - - knowledgeItemGetDescendantItemsMock.mockRejectedValueOnce(lookupError) - - await expect(service.reindexItems(base.id, [root])).rejects.toBe(lookupError) - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'descendant lookup failed' - }) - expect(knowledgeItemGetLeafDescendantItemsMock).not.toHaveBeenCalled() - expect(knowledgeItemDeleteLeafDescendantItemsMock).not.toHaveBeenCalled() - expect(prepareKnowledgeItemMock).not.toHaveBeenCalled() - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() - }) - - it('uses the operation base snapshot for children created by expansion', async () => { - const service = new KnowledgeRuntimeService() - const originalBase = { ...createBase(), chunkSize: 512 } - const updatedBase = { ...createBase(), chunkSize: 2048 } - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - - knowledgeBaseGetByIdMock.mockResolvedValueOnce(originalBase).mockResolvedValue(updatedBase) - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockResolvedValueOnce([child]) - - await service.addItems(originalBase.id, [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - - await vi.waitFor(() => { - expect(chunkDocumentsMock).toHaveBeenCalledWith(originalBase, child, [{ text: 'document' }]) - }) - expect(knowledgeBaseGetByIdMock).toHaveBeenCalledOnce() - }) - - it('interrupts queued root preparation during stop and does not enqueue created leaves', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const expansionStarted = createDeferred() - const finishCreation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockImplementationOnce( - async ({ onCreatedItem, signal }: { onCreatedItem: (item: KnowledgeItem) => void; signal: AbortSignal }) => { - expansionStarted.resolve() - await finishCreation.promise - signal.throwIfAborted() - onCreatedItem(child) - return [child] - } - ) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - - const addPromise = service.addItems('kb-1', [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await expansionStarted.promise - - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - await flushPromises() - - finishCreation.resolve() - await stopPromise - await addPromise - - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(child, expect.any(AbortSignal)) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(child.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - }) - - it('returns interrupted base item ids without deleting vector artifacts', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'processing') - const child = createNoteItem('child-1', 'processing') - const expansionStarted = createDeferred() - const finishCreation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([child]) - prepareKnowledgeItemMock.mockImplementationOnce(async ({ signal }: { signal: AbortSignal }) => { - expansionStarted.resolve() - await finishCreation.promise - signal.throwIfAborted() - return [child] - }) - - const addPromise = service.addItems('kb-1', [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await expansionStarted.promise - - const deleteBasePromise = service.deleteBase('kb-1') - await flushPromises() - - finishCreation.resolve() - await expect(deleteBasePromise).resolves.toEqual([root.id, child.id]) - await addPromise - - expect(deleteVectorStoreMock).not.toHaveBeenCalled() - }) - - it('marks interrupted base roots failed when expanding interrupted base entries fails', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'processing') - const expansionStarted = createDeferred() - const finishCreation = createDeferred() - const expansionError = new Error('descendant lookup failed') - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockImplementationOnce(async ({ signal }: { signal: AbortSignal }) => { - expansionStarted.resolve() - await finishCreation.promise - signal.throwIfAborted() - return [] - }) - knowledgeItemGetDescendantItemsMock.mockRejectedValueOnce(expansionError) - - const addPromise = service.addItems('kb-1', [ - { type: 'directory', data: { source: '/docs/dir-1', path: '/docs/dir-1' } } - ]) - await expansionStarted.promise - - const deleteBasePromise = service.deleteBase('kb-1') - await flushPromises() - - finishCreation.resolve() - await expect(deleteBasePromise).rejects.toBe(expansionError) - await addPromise - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(root.id, 'failed', { - error: 'descendant lookup failed' - }) - expect(deleteVectorStoreMock).not.toHaveBeenCalled() - }) - - it('deletes base vector artifacts through the artifact cleanup method', async () => { - const service = new KnowledgeRuntimeService() - - await expect(service.deleteBaseArtifacts('kb-1')).resolves.toBeUndefined() - - expect(deleteVectorStoreMock).toHaveBeenCalledWith('kb-1') - }) - - it('marks nested preparation subtree failed on stop', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-root', 'processing') - const childDir = createDirectoryItem('dir-child', 'processing') - const child = createNoteItem('child-1', 'processing') - const expansionStarted = createDeferred() - const finishCreation = createDeferred() - - knowledgeItemCreateMock.mockResolvedValueOnce(root) - prepareKnowledgeItemMock.mockImplementationOnce( - async ({ onCreatedItem, signal }: { onCreatedItem: (item: KnowledgeItem) => void; signal: AbortSignal }) => { - expansionStarted.resolve() - await finishCreation.promise - signal.throwIfAborted() - onCreatedItem(childDir) - onCreatedItem(child) - return [child] - } - ) - knowledgeItemGetDescendantItemsMock.mockResolvedValue([childDir, child]) - - const addPromise = service.addItems('kb-1', [ - { type: 'directory', data: { source: '/docs/dir-root', path: '/docs/dir-root' } } - ]) - await expansionStarted.promise - - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - await flushPromises() - - finishCreation.resolve() - await stopPromise - await addPromise - - expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalledWith(child, expect.any(AbortSignal)) - for (const item of [root, childDir, child]) { - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - } - }) - - it('merges interrupted same-base entries before deleting vectors on stop', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const firstItem = createNoteItem('note-1') - const secondItem = createNoteItem('note-2') - const waitForRunningMock = vi.fn().mockResolvedValue(undefined) - const interruptAllMock = vi - .fn() - .mockReturnValue([ - createQueueDescriptor(base, firstItem, 'index-leaf'), - createQueueDescriptor(base, secondItem, 'index-leaf') - ]) - - ;( - service as unknown as { - queue: { interruptAll: typeof interruptAllMock; waitForRunning: typeof waitForRunningMock } - } - ).queue = { - interruptAll: interruptAllMock, - waitForRunning: waitForRunningMock - } - - await (service as unknown as { onStop: () => Promise }).onStop() - - expect(waitForRunningMock).toHaveBeenCalledWith(['note-1', 'note-2']) - expect(getStoreIfExistsMock).toHaveBeenCalledOnce() - expect(getStoreIfExistsMock).toHaveBeenCalledWith(base) - expect(vectorStoreDeleteMock).toHaveBeenCalledWith('note-1') - expect(vectorStoreDeleteMock).toHaveBeenCalledWith('note-2') - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-1', 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith('note-2', 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - }) - - it('reindex deletes old vectors before returning and then schedules background indexing', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const item = createNoteItem('note-1', 'processing') - const deleteDeferred = createDeferred() - vectorStoreDeleteMock.mockReturnValueOnce(deleteDeferred.promise) - - let resolved = false - const reindexPromise = service.reindexItems(base.id, [item]).then(() => { - resolved = true - }) - - await vi.waitFor(() => { - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(item.id) - }) - await flushPromises() - expect(resolved).toBe(false) - - deleteDeferred.resolve() - await reindexPromise - - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledWith(item, expect.any(AbortSignal)) - }) - }) - - it('deletes item vectors synchronously for deleteItems', async () => { - const service = new KnowledgeRuntimeService() - const base = createBase() - const item = createNoteItem() - - await service.deleteItems(base.id, [item]) - - expect(getStoreIfExistsMock).toHaveBeenCalledWith(base) - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(item.id) - }) - - it('lists chunks for leaf descendants when the requested item is a container', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-1', 'completed') - const fileChild = createNoteItem('file-child', 'completed') - const urlChild = createNoteItem('url-child', 'completed') - const fileChunk = { - id_: 'chunk-file-1', - metadata: { - itemId: fileChild.id, - itemType: 'note', - source: 'file child', - chunkIndex: 0, - tokenCount: 3 - }, - getContent: vi.fn(() => 'file child chunk') - } - const urlChunk = { - id_: 'chunk-url-1', - metadata: { - itemId: urlChild.id, - itemType: 'note', - source: 'url child', - chunkIndex: 0, - tokenCount: 4 - }, - getContent: vi.fn(() => 'url child chunk') - } - - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([fileChild, urlChild]) - vectorStoreListByExternalIdMock.mockImplementation(async (itemId: string) => { - if (itemId === fileChild.id) { - return [fileChunk] - } - if (itemId === urlChild.id) { - return [urlChunk] - } - - return [] - }) - - await expect(service.listItemChunks('kb-1', root.id)).resolves.toEqual([ - { - id: 'chunk-file-1', - itemId: fileChild.id, - content: 'file child chunk', - metadata: fileChunk.metadata - }, - { - id: 'chunk-url-1', - itemId: urlChild.id, - content: 'url child chunk', - metadata: urlChunk.metadata - } - ]) - - expect(knowledgeItemGetLeafDescendantItemsMock).toHaveBeenCalledWith('kb-1', [root.id]) - expect(vectorStoreListByExternalIdMock).toHaveBeenCalledWith(fileChild.id) - expect(vectorStoreListByExternalIdMock).toHaveBeenCalledWith(urlChild.id) - expect(vectorStoreListByExternalIdMock).not.toHaveBeenCalledWith(root.id) - }) - - it('returns no chunks without creating a vector store when a container has no leaf descendants', async () => { - const service = new KnowledgeRuntimeService() - const root = createDirectoryItem('dir-empty', 'completed') - - knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([]) - - await expect(service.listItemChunks('kb-1', root.id)).resolves.toEqual([]) - - expect(knowledgeItemGetLeafDescendantItemsMock).toHaveBeenCalledWith('kb-1', [root.id]) - expect(createVectorStoreMock).not.toHaveBeenCalled() - expect(vectorStoreListByExternalIdMock).not.toHaveBeenCalled() - }) - - it('throws when search query embedding is empty', async () => { - const service = new KnowledgeRuntimeService() - embedManyMock.mockResolvedValueOnce({ embeddings: [] }) - - await expect(service.search('kb-1', 'hello')).rejects.toThrow('Failed to embed search query') - - expect(vectorStoreQueryMock).not.toHaveBeenCalled() - }) - - it('throws validation error for punctuation-only queries without embedding', async () => { - const service = new KnowledgeRuntimeService() - - await expect(service.search('kb-1', '...')).rejects.toMatchObject({ - message: 'Query has no searchable tokens', - details: { - fieldErrors: { - query: ['Query has no searchable tokens'] - } - } - }) - - expect(knowledgeBaseGetByIdMock).not.toHaveBeenCalled() - expect(embedManyMock).not.toHaveBeenCalled() - expect(vectorStoreQueryMock).not.toHaveBeenCalled() - }) - - it('embeds CJK search queries', async () => { - const service = new KnowledgeRuntimeService() - - await expect(service.search('kb-1', '你好')).resolves.toEqual([]) - - expect(knowledgeBaseGetByIdMock).toHaveBeenCalledWith('kb-1') - expect(embedManyMock).toHaveBeenCalledWith({ model: { modelId: 'embedding-model' }, values: ['你好'] }) - expect(vectorStoreQueryMock).toHaveBeenCalled() - }) - - it('marks vector search scores as relevance and filters them by threshold', async () => { - const service = new KnowledgeRuntimeService() - const base = { ...createBase(), searchMode: 'default' as const, threshold: 0.7 } - const lowNode = createSearchNode('chunk-low', 0.6, 0) - const highNode = createSearchNode('chunk-high', 0.8, 1) - - knowledgeBaseGetByIdMock.mockResolvedValueOnce(base) - vectorStoreQueryMock.mockResolvedValueOnce({ nodes: [lowNode, highNode], similarities: [0.6, 0.8] }) - - await expect(service.search('kb-1', 'hello')).resolves.toEqual([ - { - pageContent: 'content chunk-high', - score: 0.8, - scoreKind: 'relevance', - rank: 1, - metadata: highNode.metadata, - itemId: 'item-chunk-high', - chunkId: 'chunk-high' - } - ]) - }) - - it.each(['bm25', 'hybrid'] as const)( - 'marks %s search scores as ranking and does not filter by threshold', - async (searchMode) => { const service = new KnowledgeRuntimeService() - const base = { ...createBase(), searchMode, threshold: 0.7 } - const firstNode = createSearchNode(`${searchMode}-first`, 0.2, 0) - const secondNode = createSearchNode(`${searchMode}-second`, 0.1, 1) - - knowledgeBaseGetByIdMock.mockResolvedValueOnce(base) - vectorStoreQueryMock.mockResolvedValueOnce({ nodes: [firstNode, secondNode], similarities: [0.2, 0.1] }) - - await expect(service.search('kb-1', 'hello')).resolves.toEqual([ - { - pageContent: `content ${searchMode}-first`, - score: 0.2, - scoreKind: 'ranking', - rank: 1, - metadata: firstNode.metadata, - itemId: `item-${searchMode}-first`, - chunkId: `${searchMode}-first` - }, - { - pageContent: `content ${searchMode}-second`, - score: 0.1, - scoreKind: 'ranking', - rank: 2, - metadata: secondNode.metadata, - itemId: `item-${searchMode}-second`, - chunkId: `${searchMode}-second` - } - ]) - } - ) - - it('reranks search results when the base has a rerank model', async () => { - const service = new KnowledgeRuntimeService() - const base = { ...createBase(), rerankModelId: 'openai::rerank-model', threshold: 0.7 } - const node = { - id_: 'chunk-1', - metadata: { - itemId: 'note-1', - itemType: 'note', - source: 'note-1', - chunkIndex: 0, - tokenCount: 2 - }, - getContent: vi.fn(() => 'hello world') - } - const reranked = [ - { - pageContent: 'hello world', - score: 0.6, - scoreKind: 'relevance' as const, - rank: 1, - metadata: node.metadata, - itemId: 'note-1', - chunkId: 'chunk-1' - } - ] - - knowledgeBaseGetByIdMock.mockResolvedValueOnce(base) - vectorStoreQueryMock.mockResolvedValueOnce({ nodes: [node], similarities: [0.8] }) - rerankKnowledgeSearchResultsMock.mockResolvedValueOnce(reranked) - - await expect(service.search('kb-1', 'hello')).resolves.toEqual([]) - - expect(rerankKnowledgeSearchResultsMock).toHaveBeenCalledWith(base, 'hello', [ - { - pageContent: 'hello world', - score: 0.8, - scoreKind: 'ranking', - rank: 1, - metadata: node.metadata, - itemId: 'note-1', - chunkId: 'chunk-1' - } - ]) - }) - - it('marks queued work failed on stop', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - const readDeferred = createDeferred() - loadKnowledgeItemDocumentsMock.mockReturnValueOnce(readDeferred.promise) - - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalled() - }) - - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - await flushPromises() - expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalledWith(item.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' - }) - - readDeferred.resolve([{ text: 'document' }]) - await stopPromise - - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' + await expect(service.reindexItems('kb-1', [root])).resolves.toBeUndefined() + expect(loggerWarnMock).toHaveBeenCalledWith('reindex cancel failed (job may already be terminal)', { + jobId: 'job-stale', + error: 'already terminal' + }) }) }) - it('marks queued work failed on stop when vector cleanup fails', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - const readDeferred = createDeferred() - loadKnowledgeItemDocumentsMock.mockReturnValueOnce(readDeferred.promise) - vectorStoreDeleteMock.mockRejectedValueOnce(new Error('delete failed')) + describe('deleteItems', () => { + it('cancels in-flight jobs for the subtree and cleans leaf vectors (no DB row delete)', async () => { + const root = createNoteItem('note-1', 'processing') + knowledgeItemGetDescendantAndSelfItemsMock.mockResolvedValueOnce([root]) + knowledgeItemGetLeafDescendantItemsMock.mockResolvedValueOnce([root]) + listMock.mockResolvedValueOnce([{ id: 'job-x', input: { itemId: 'note-1' } }]) - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalled() - }) + const service = new KnowledgeRuntimeService() + await service.deleteItems('kb-1', [root]) - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - readDeferred.resolve([{ text: 'document' }]) - - await expect(stopPromise).resolves.toBeUndefined() - expect(vectorStoreDeleteMock).toHaveBeenCalledWith(item.id) - expect(knowledgeItemUpdateStatusMock).toHaveBeenCalledWith(item.id, 'failed', { - error: 'Knowledge task interrupted by service shutdown' + expect(cancelMock).toHaveBeenCalledWith('job-x', 'delete-items') + expect(knowledgeItemDeleteMock).not.toHaveBeenCalled() }) }) - it('does not fail stop when failure-state persistence fails during interrupt cleanup', async () => { - const service = new KnowledgeRuntimeService() - const item = createNoteItem('note-1', 'processing') - const readDeferred = createDeferred() - const failureStateError = new Error('database locked') - loadKnowledgeItemDocumentsMock.mockReturnValueOnce(readDeferred.promise) + describe('runWithBaseWriteLockForBase', () => { + it('serializes overlapping tasks for the same base', async () => { + const service = new KnowledgeRuntimeService() + const order: string[] = [] - await service.addItems('kb-1', [{ type: 'note', data: { source: 'note-1', content: 'hello note-1' } }]) - await vi.waitFor(() => { - expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalled() + const first = service.runWithBaseWriteLockForBase('kb-1', async () => { + order.push('first-start') + await new Promise((r) => setTimeout(r, 10)) + order.push('first-end') + }) + const second = service.runWithBaseWriteLockForBase('kb-1', async () => { + order.push('second-start') + order.push('second-end') + }) + + await Promise.all([first, second]) + expect(order).toEqual(['first-start', 'first-end', 'second-start', 'second-end']) }) - const stopPromise = (service as unknown as { onStop: () => Promise }).onStop() - knowledgeItemUpdateStatusMock.mockRejectedValue(failureStateError) - readDeferred.resolve([{ text: 'document' }]) + it('releases the lock even when the task throws', async () => { + const service = new KnowledgeRuntimeService() - await expect(stopPromise).resolves.toBeUndefined() - expect(loggerErrorMock).toHaveBeenCalledWith( - 'Failed to persist knowledge item failure state during runtime cleanup', - expect.objectContaining({ name: 'FailedToPersistFailureStateError' }), - { - itemIds: [item.id], - operation: 'interruptedRuntimeCleanup', - reason: 'Knowledge task interrupted by service shutdown' - } - ) + await expect( + service.runWithBaseWriteLockForBase('kb-1', async () => { + throw new Error('boom') + }) + ).rejects.toThrow('boom') + + // Second call must not deadlock. + await expect(service.runWithBaseWriteLockForBase('kb-1', async () => 'ok')).resolves.toBe('ok') + }) + }) + + describe('waitForBaseWriteLocks', () => { + it('returns immediately when no locks are held', async () => { + const service = new KnowledgeRuntimeService() + await expect(service.waitForBaseWriteLocks('kb-1', 100)).resolves.toBeUndefined() + }) + + it('logs a warning and returns when timeout elapses while a lock is still held', async () => { + vi.useFakeTimers() + const service = new KnowledgeRuntimeService() + + const blocker = service.runWithBaseWriteLockForBase('kb-1', async () => { + await new Promise(() => { + /* never resolves */ + }) + }) + + const waitPromise = service.waitForBaseWriteLocks('kb-1', 50) + await vi.advanceTimersByTimeAsync(60) + await expect(waitPromise).resolves.toBeUndefined() + expect(loggerWarnMock).toHaveBeenCalledWith('waitForBaseWriteLocks timed out', { + baseId: 'kb-1', + timeoutMs: 50, + lockCount: 1 + }) + + void blocker.catch(() => undefined) + }) }) }) diff --git a/src/main/services/knowledge/runtime/utils/prepare.ts b/src/main/services/knowledge/runtime/utils/prepare.ts index 2b99c876aa..df29e8a766 100644 --- a/src/main/services/knowledge/runtime/utils/prepare.ts +++ b/src/main/services/knowledge/runtime/utils/prepare.ts @@ -9,7 +9,7 @@ import { import type { IndexableKnowledgeItem } from '../../types/items' import { expandDirectoryOwnerToTree, type ExpandedDirectoryNode } from '../../utils/directory' -import { isIndexableKnowledgeItem } from '../../utils/items' +import { isContainerKnowledgeItem, isIndexableKnowledgeItem } from '../../utils/items' import { expandSitemapOwnerToCreateItems } from '../../utils/sitemap' const logger = loggerService.withContext('KnowledgeRuntimePrepare') @@ -165,7 +165,7 @@ async function createRuntimeItem( onCreatedItem(createdItem) const processingItem = await runMutation(() => - createdItem.type === 'directory' || createdItem.type === 'sitemap' + isContainerKnowledgeItem(createdItem) ? knowledgeItemService.updateStatus(createdItem.id, 'processing', { phase: 'preparing' }) : knowledgeItemService.updateStatus(createdItem.id, 'processing') ) diff --git a/src/main/services/knowledge/tasks/__tests__/indexLeafJobHandler.test.ts b/src/main/services/knowledge/tasks/__tests__/indexLeafJobHandler.test.ts new file mode 100644 index 0000000000..546c8fefc4 --- /dev/null +++ b/src/main/services/knowledge/tasks/__tests__/indexLeafJobHandler.test.ts @@ -0,0 +1,223 @@ +import type { JobContext } from '@main/core/job/types' +import type { KnowledgeItem, KnowledgeItemOf } from '@shared/data/types/knowledge' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + chunkDocumentsMock, + createVectorStoreMock, + embedDocumentsMock, + getEmbedModelMock, + knowledgeBaseGetByIdMock, + knowledgeItemGetByIdMock, + knowledgeItemUpdateStatusMock, + loadKnowledgeItemDocumentsMock, + replaceByExternalIdMock, + runWithBaseWriteLockForBaseMock +} = vi.hoisted(() => ({ + chunkDocumentsMock: vi.fn(), + createVectorStoreMock: vi.fn(), + embedDocumentsMock: vi.fn(), + getEmbedModelMock: vi.fn(), + knowledgeBaseGetByIdMock: vi.fn(), + knowledgeItemGetByIdMock: vi.fn(), + knowledgeItemUpdateStatusMock: vi.fn(), + loadKnowledgeItemDocumentsMock: vi.fn(), + replaceByExternalIdMock: vi.fn(), + runWithBaseWriteLockForBaseMock: vi.fn() +})) + +vi.mock('@application', async () => { + const { mockApplicationFactory } = await import('@test-mocks/main/application') + return mockApplicationFactory({ + KnowledgeRuntimeService: { + runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock + }, + KnowledgeVectorStoreService: { + createStore: createVectorStoreMock + } + } as Parameters[0]) +}) + +vi.mock('@logger', () => ({ + loggerService: { + withContext: () => ({ + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: vi.fn() + }) + } +})) + +vi.mock('@data/services/KnowledgeBaseService', () => ({ + knowledgeBaseService: { + getById: knowledgeBaseGetByIdMock + } +})) + +vi.mock('@data/services/KnowledgeItemService', () => ({ + knowledgeItemService: { + getById: knowledgeItemGetByIdMock, + updateStatus: knowledgeItemUpdateStatusMock + } +})) + +vi.mock('../../readers/KnowledgeReader', () => ({ + loadKnowledgeItemDocuments: loadKnowledgeItemDocumentsMock +})) + +vi.mock('../../utils/chunk', () => ({ + chunkDocuments: chunkDocumentsMock +})) + +vi.mock('../../utils/embed', () => ({ + embedDocuments: embedDocumentsMock +})) + +vi.mock('../../utils/model', () => ({ + getEmbedModel: getEmbedModelMock +})) + +const { indexLeafJobHandler } = await import('../indexLeafJobHandler') + +function createLeafItem(id = 'note-1', status: KnowledgeItem['status'] = 'processing'): KnowledgeItemOf<'note'> { + return { + id, + baseId: 'kb-1', + groupId: null, + type: 'note', + data: { source: id, content: `body of ${id}` }, + status, + phase: status === 'processing' ? 'reading' : null, + error: null, + createdAt: '2026-04-08T00:00:00.000Z', + updatedAt: '2026-04-08T00:00:00.000Z' + } as KnowledgeItemOf<'note'> +} + +function createCtx( + overrides: Partial> = {} +): JobContext<{ baseId: string; itemId: string; parentJobId: string | null }> { + const controller = new AbortController() + return { + jobId: 'job-leaf-1', + input: { baseId: 'kb-1', itemId: 'note-1', parentJobId: null }, + attempt: 1, + signal: controller.signal, + metadata: {}, + patchMetadata: vi.fn().mockResolvedValue(undefined), + reportProgress: vi.fn(), + logger: { + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: vi.fn() + } as unknown as JobContext['logger'], + ...overrides + } as JobContext<{ baseId: string; itemId: string; parentJobId: string | null }> +} + +describe('indexLeafJobHandler', () => { + beforeEach(() => { + vi.clearAllMocks() + knowledgeBaseGetByIdMock.mockResolvedValue({ id: 'kb-1' }) + knowledgeItemGetByIdMock.mockResolvedValue(createLeafItem('note-1', 'processing')) + knowledgeItemUpdateStatusMock.mockImplementation(async (id: string) => createLeafItem(id, 'processing')) + loadKnowledgeItemDocumentsMock.mockResolvedValue([{ text: 'doc-1' }]) + chunkDocumentsMock.mockReturnValue([{ text: 'chunk-1' }]) + embedDocumentsMock.mockResolvedValue([{ id_: 'chunk-1', getContent: vi.fn() }]) + getEmbedModelMock.mockReturnValue({ id: 'embed-model' }) + createVectorStoreMock.mockResolvedValue({ + replaceByExternalId: replaceByExternalIdMock + }) + replaceByExternalIdMock.mockResolvedValue(['chunk-1']) + runWithBaseWriteLockForBaseMock.mockImplementation(async (_baseId: string, task: () => Promise) => task()) + }) + + it('exposes the documented handler configuration', () => { + expect(indexLeafJobHandler.recovery).toBe('retry') + expect(indexLeafJobHandler.defaultConcurrency).toBe(5) + expect(indexLeafJobHandler.defaultTimeoutMs).toBe(5 * 60 * 1000) + expect(indexLeafJobHandler.defaultRetryPolicy).toEqual({ + maxAttempts: 3, + backoff: 'exponential', + baseDelayMs: 1000, + maxDelayMs: 30_000 + }) + expect(indexLeafJobHandler.defaultQueue?.({ baseId: 'kb-99', itemId: 'x', parentJobId: null })).toBe('base.kb-99') + }) + + it('runs read → chunk → embed → replaceByExternalId → completed in order', async () => { + await indexLeafJobHandler.execute(createCtx()) + + expect(loadKnowledgeItemDocumentsMock).toHaveBeenCalledOnce() + expect(chunkDocumentsMock).toHaveBeenCalledOnce() + expect(embedDocumentsMock).toHaveBeenCalledOnce() + expect(replaceByExternalIdMock).toHaveBeenCalledWith('note-1', [expect.objectContaining({ id_: 'chunk-1' })]) + expect(knowledgeItemUpdateStatusMock).toHaveBeenLastCalledWith('note-1', 'completed') + + const order = { + load: loadKnowledgeItemDocumentsMock.mock.invocationCallOrder[0], + chunk: chunkDocumentsMock.mock.invocationCallOrder[0], + embed: embedDocumentsMock.mock.invocationCallOrder[0], + replace: replaceByExternalIdMock.mock.invocationCallOrder[0] + } + expect(order.load).toBeLessThan(order.chunk) + expect(order.chunk).toBeLessThan(order.embed) + expect(order.embed).toBeLessThan(order.replace) + }) + + it('skips embedding when the item is already completed', async () => { + knowledgeItemGetByIdMock.mockResolvedValueOnce(createLeafItem('note-1', 'completed')) + const reportProgress = vi.fn() + + await indexLeafJobHandler.execute(createCtx({ reportProgress })) + + expect(loadKnowledgeItemDocumentsMock).not.toHaveBeenCalled() + expect(embedDocumentsMock).not.toHaveBeenCalled() + expect(replaceByExternalIdMock).not.toHaveBeenCalled() + expect(reportProgress).toHaveBeenCalledWith(100, { + stage: 'already-completed', + currentFile: 1, + totalFiles: 1 + }) + }) + + it('throws KNOWLEDGE_EMPTY_CONTENT when the reader returns nothing', async () => { + loadKnowledgeItemDocumentsMock.mockResolvedValueOnce([]) + + await expect(indexLeafJobHandler.execute(createCtx())).rejects.toThrow('KNOWLEDGE_EMPTY_CONTENT') + expect(embedDocumentsMock).not.toHaveBeenCalled() + expect(replaceByExternalIdMock).not.toHaveBeenCalled() + }) + + it('propagates replaceByExternalId errors so JobManager can retry', async () => { + const writeError = new Error('disk full') + replaceByExternalIdMock.mockRejectedValueOnce(writeError) + + await expect(indexLeafJobHandler.execute(createCtx())).rejects.toBe(writeError) + // status should never be flipped to completed when replace fails + expect(knowledgeItemUpdateStatusMock).not.toHaveBeenCalledWith('note-1', 'completed') + }) + + it('aborts mid-flight when the signal fires', async () => { + const controller = new AbortController() + controller.abort(new Error('user cancelled')) + + await expect(indexLeafJobHandler.execute(createCtx({ signal: controller.signal }))).rejects.toThrow( + 'user cancelled' + ) + expect(replaceByExternalIdMock).not.toHaveBeenCalled() + }) + + it('refuses to process a non-leaf item type', async () => { + knowledgeItemGetByIdMock.mockResolvedValueOnce({ + ...createLeafItem('note-1', 'processing'), + type: 'directory' + } as unknown as KnowledgeItem) + + await expect(indexLeafJobHandler.execute(createCtx())).rejects.toThrow( + /indexLeafJobHandler received non-leaf knowledge item/ + ) + }) +}) diff --git a/src/main/services/knowledge/tasks/__tests__/prepareRootJobHandler.test.ts b/src/main/services/knowledge/tasks/__tests__/prepareRootJobHandler.test.ts new file mode 100644 index 0000000000..740d50cc46 --- /dev/null +++ b/src/main/services/knowledge/tasks/__tests__/prepareRootJobHandler.test.ts @@ -0,0 +1,226 @@ +import type { JobContext } from '@main/core/job/types' +import type { KnowledgeItem, KnowledgeItemOf } from '@shared/data/types/knowledge' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + cancelMock, + deleteLeafDescendantItemsMock, + enqueueMock, + knowledgeBaseGetByIdMock, + knowledgeItemGetByIdMock, + knowledgeItemUpdateStatusMock, + listMock, + prepareKnowledgeItemMock, + runWithBaseWriteLockForBaseMock +} = vi.hoisted(() => ({ + cancelMock: vi.fn(), + deleteLeafDescendantItemsMock: vi.fn(), + enqueueMock: vi.fn(), + knowledgeBaseGetByIdMock: vi.fn(), + knowledgeItemGetByIdMock: vi.fn(), + knowledgeItemUpdateStatusMock: vi.fn(), + listMock: vi.fn(), + prepareKnowledgeItemMock: vi.fn(), + runWithBaseWriteLockForBaseMock: vi.fn() +})) + +vi.mock('@application', async () => { + const { mockApplicationFactory } = await import('@test-mocks/main/application') + return mockApplicationFactory({ + KnowledgeRuntimeService: { + runWithBaseWriteLockForBase: runWithBaseWriteLockForBaseMock + }, + JobManager: { + enqueue: enqueueMock, + cancel: cancelMock, + list: listMock + } + } as Parameters[0]) +}) + +vi.mock('@logger', () => ({ + loggerService: { + withContext: () => ({ + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: vi.fn() + }) + } +})) + +vi.mock('@data/services/KnowledgeBaseService', () => ({ + knowledgeBaseService: { + getById: knowledgeBaseGetByIdMock + } +})) + +vi.mock('@data/services/KnowledgeItemService', () => ({ + knowledgeItemService: { + getById: knowledgeItemGetByIdMock, + deleteLeafDescendantItems: deleteLeafDescendantItemsMock, + updateStatus: knowledgeItemUpdateStatusMock + } +})) + +vi.mock('../../runtime/utils/prepare', () => ({ + prepareKnowledgeItem: prepareKnowledgeItemMock +})) + +const { prepareRootJobHandler } = await import('../prepareRootJobHandler') + +function createDirectoryItem(id = 'dir-1'): KnowledgeItemOf<'directory'> { + return { + id, + baseId: 'kb-1', + groupId: null, + type: 'directory', + data: { source: id, path: `/docs/${id}` }, + status: 'processing', + phase: 'preparing', + error: null, + createdAt: '2026-04-08T00:00:00.000Z', + updatedAt: '2026-04-08T00:00:00.000Z' + } as KnowledgeItemOf<'directory'> +} + +function createLeafItem(id: string): KnowledgeItem { + return { + id, + baseId: 'kb-1', + groupId: 'dir-1', + type: 'note', + data: { source: id, content: `body of ${id}` }, + status: 'processing', + phase: null, + error: null, + createdAt: '2026-04-08T00:00:00.000Z', + updatedAt: '2026-04-08T00:00:00.000Z' + } as KnowledgeItem +} + +function createCtx(overrides: Partial> = {}): JobContext<{ + baseId: string + itemId: string +}> { + const controller = new AbortController() + return { + jobId: 'job-prepare-root-1', + input: { baseId: 'kb-1', itemId: 'dir-1' }, + attempt: 1, + signal: controller.signal, + metadata: {}, + patchMetadata: vi.fn().mockResolvedValue(undefined), + reportProgress: vi.fn(), + logger: { + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: vi.fn() + } as unknown as JobContext['logger'], + ...overrides + } as JobContext<{ baseId: string; itemId: string }> +} + +describe('prepareRootJobHandler', () => { + beforeEach(() => { + vi.clearAllMocks() + knowledgeBaseGetByIdMock.mockResolvedValue({ id: 'kb-1' }) + knowledgeItemGetByIdMock.mockResolvedValue(createDirectoryItem()) + knowledgeItemUpdateStatusMock.mockResolvedValue(createDirectoryItem()) + deleteLeafDescendantItemsMock.mockResolvedValue(undefined) + listMock.mockResolvedValue([]) + cancelMock.mockResolvedValue(undefined) + enqueueMock.mockResolvedValue({ id: 'leaf-job', snapshot: {}, finished: Promise.resolve({}) }) + runWithBaseWriteLockForBaseMock.mockImplementation(async (_baseId: string, task: () => Promise) => task()) + }) + + it('exposes the documented handler configuration', () => { + expect(prepareRootJobHandler.recovery).toBe('retry') + expect(prepareRootJobHandler.defaultConcurrency).toBe(5) + expect(prepareRootJobHandler.defaultTimeoutMs).toBe(10 * 60 * 1000) + expect(prepareRootJobHandler.defaultRetryPolicy).toEqual({ + maxAttempts: 3, + backoff: 'exponential', + baseDelayMs: 2000, + maxDelayMs: 60_000 + }) + expect(prepareRootJobHandler.defaultQueue?.({ baseId: 'kb-42', itemId: 'x' })).toBe('base.kb-42') + }) + + it('expands the container and enqueues one knowledge.index-leaf job per leaf', async () => { + const leaves = [createLeafItem('leaf-a'), createLeafItem('leaf-b')] + prepareKnowledgeItemMock.mockResolvedValueOnce(leaves) + + await prepareRootJobHandler.execute(createCtx()) + + expect(enqueueMock).toHaveBeenCalledTimes(2) + expect(enqueueMock).toHaveBeenNthCalledWith( + 1, + 'knowledge.index-leaf', + { baseId: 'kb-1', itemId: 'leaf-a', parentJobId: 'job-prepare-root-1' }, + { idempotencyKey: 'knowledge:kb-1:leaf-a', parentId: 'job-prepare-root-1' } + ) + expect(enqueueMock).toHaveBeenNthCalledWith( + 2, + 'knowledge.index-leaf', + { baseId: 'kb-1', itemId: 'leaf-b', parentJobId: 'job-prepare-root-1' }, + { idempotencyKey: 'knowledge:kb-1:leaf-b', parentId: 'job-prepare-root-1' } + ) + }) + + it('cancels only orphan child jobs that match parentJobId === ctx.jobId on retry', async () => { + prepareKnowledgeItemMock.mockResolvedValueOnce([]) + listMock.mockResolvedValueOnce([ + // Child of THIS prepare-root from a previous attempt — must be cancelled. + { id: 'orphan-of-mine', input: { parentJobId: 'job-prepare-root-1' } }, + // Child of a different prepare-root running on this same queue — leave alone. + { id: 'sibling-prepare-root-orphan', input: { parentJobId: 'job-other-prepare-root' } }, + // Directly-enqueued leaf (e.g. addItems on a file) — leave alone. + { id: 'unrelated-leaf', input: { parentJobId: null } }, + // The row representing this very prepare-root execution — must not self-cancel. + { id: 'job-prepare-root-1', input: { parentJobId: null } } + ]) + + await prepareRootJobHandler.execute(createCtx()) + + const cancelledIds = cancelMock.mock.calls.map((call) => call[0]) + expect(cancelledIds).toEqual(['orphan-of-mine']) + expect(cancelMock).toHaveBeenCalledWith('orphan-of-mine', 'prepare-root-retry') + }) + + it('clears prior leaf rows via deleteLeafDescendantItems before re-expanding', async () => { + prepareKnowledgeItemMock.mockResolvedValueOnce([]) + + await prepareRootJobHandler.execute(createCtx()) + + expect(deleteLeafDescendantItemsMock).toHaveBeenCalledWith('kb-1', ['dir-1']) + expect(deleteLeafDescendantItemsMock.mock.invocationCallOrder[0]).toBeLessThan( + prepareKnowledgeItemMock.mock.invocationCallOrder[0] + ) + }) + + it('treats expansion that yields zero leaves as success', async () => { + prepareKnowledgeItemMock.mockResolvedValueOnce([]) + const reportProgress = vi.fn() + + await prepareRootJobHandler.execute(createCtx({ reportProgress })) + + expect(enqueueMock).not.toHaveBeenCalled() + expect(reportProgress).toHaveBeenLastCalledWith(100, { + stage: 'done', + currentFile: 0, + totalFiles: 0 + }) + }) + + it('propagates abort errors raised by signal.throwIfAborted', async () => { + const controller = new AbortController() + controller.abort(new Error('aborted by test')) + + await expect(prepareRootJobHandler.execute(createCtx({ signal: controller.signal }))).rejects.toThrow( + 'aborted by test' + ) + expect(enqueueMock).not.toHaveBeenCalled() + }) +}) diff --git a/src/main/services/knowledge/tasks/indexLeafJobHandler.ts b/src/main/services/knowledge/tasks/indexLeafJobHandler.ts new file mode 100644 index 0000000000..29e4fbd830 --- /dev/null +++ b/src/main/services/knowledge/tasks/indexLeafJobHandler.ts @@ -0,0 +1,147 @@ +// Side-effect import picks up the declare-module merges for the knowledge job +// registry so the JobHandler<…> generic below resolves the payload type. +import './jobTypes' + +import { application } from '@application' +import { knowledgeBaseService } from '@data/services/KnowledgeBaseService' +import { knowledgeItemService } from '@data/services/KnowledgeItemService' +import { loggerService } from '@logger' +import type { JobHandler } from '@main/core/job/types' +import { ErrorCode, isDataApiError } from '@shared/data/api' +import type { KnowledgeBase, KnowledgeItem } from '@shared/data/types/knowledge' + +import { loadKnowledgeItemDocuments } from '../readers/KnowledgeReader' +import { chunkDocuments } from '../utils/chunk' +import { embedDocuments } from '../utils/embed' +import { isIndexableKnowledgeItem } from '../utils/items' +import { getEmbedModel } from '../utils/model' +import type { KnowledgeIndexLeafPayload } from './jobTypes' + +const logger = loggerService.withContext('indexLeafJobHandler') +const KNOWLEDGE_EMPTY_CONTENT_REASON = 'KNOWLEDGE_EMPTY_CONTENT' + +function assertHasIndexableContent(items: T[]): void { + if (items.length === 0) { + throw new Error(KNOWLEDGE_EMPTY_CONTENT_REASON) + } +} + +export const indexLeafJobHandler: JobHandler = { + recovery: 'retry', + defaultQueue: (input) => `base.${input.baseId}`, + defaultConcurrency: 5, + defaultRetryPolicy: { + maxAttempts: 3, + backoff: 'exponential', + baseDelayMs: 1000, + maxDelayMs: 30_000 + }, + defaultTimeoutMs: 5 * 60 * 1000, + + async execute(ctx) { + const { baseId, itemId } = ctx.input + const runtime = application.get('KnowledgeRuntimeService') + const vectorStoreService = application.get('KnowledgeVectorStoreService') + + ctx.signal.throwIfAborted() + // Read base + item up front. If either is gone the base was deleted + // concurrently — return cleanly so the job settles as 'completed' and + // does not burn retry attempts on a dead row. + let base: KnowledgeBase + let item: KnowledgeItem + try { + base = await knowledgeBaseService.getById(baseId) + item = await knowledgeItemService.getById(itemId) + } catch (error) { + if (isDataApiError(error) && error.code === ErrorCode.NOT_FOUND) { + logger.info('Skipping index-leaf for missing base or item (likely deleted concurrently)', { + baseId, + itemId, + jobId: ctx.jobId + }) + ctx.reportProgress(100, { stage: 'item-gone', currentFile: 1, totalFiles: 1 }) + return + } + throw error + } + + if (!isIndexableKnowledgeItem(item)) { + throw new Error(`indexLeafJobHandler received non-leaf knowledge item: id=${itemId} type=${item.type}`) + } + + // Idempotent crash-retry optimization: if a previous attempt successfully + // wrote vectors and marked the item completed but the jobTable row was not + // finalized before crash, skip the embed cycle. The atomic + // `replaceByExternalId` below would otherwise idempotently overwrite the + // same chunks — correct but wasteful (embedding tokens cost real money). + if (item.status === 'completed') { + logger.info('Skipping index-leaf for already-completed item', { baseId, itemId, jobId: ctx.jobId }) + ctx.reportProgress(100, { stage: 'already-completed', currentFile: 1, totalFiles: 1 }) + return + } + + ctx.reportProgress(0, { stage: 'reading', currentFile: 0, totalFiles: 1 }) + await runtime.runWithBaseWriteLockForBase(baseId, () => + knowledgeItemService.updateStatus(itemId, 'processing', { phase: 'reading' }) + ) + + ctx.signal.throwIfAborted() + const documents = await loadKnowledgeItemDocuments(item, ctx.signal) + assertHasIndexableContent(documents) + + ctx.signal.throwIfAborted() + const chunks = chunkDocuments(base, item, documents) + assertHasIndexableContent(chunks) + + ctx.reportProgress(40, { stage: 'embedding', currentFile: 0, totalFiles: 1 }) + await runtime.runWithBaseWriteLockForBase(baseId, () => + knowledgeItemService.updateStatus(itemId, 'processing', { phase: 'embedding' }) + ) + + ctx.signal.throwIfAborted() + const embedModel = getEmbedModel(base) + const nodes = await embedDocuments(embedModel, chunks, ctx.signal) + + ctx.reportProgress(80, { stage: 'writing', currentFile: 0, totalFiles: 1 }) + + // Atomic delete-then-insert inside a single libSQL transaction. Crash-retry + // therefore never leaves orphan chunks (the prior chunk set is wiped in the + // same transaction that writes the new one) AND never loses chunks on + // insert failure (transaction rolls back, old chunks remain). + await runtime.runWithBaseWriteLockForBase(baseId, async () => { + ctx.signal.throwIfAborted() + const vectorStore = await vectorStoreService.createStore(base) + await vectorStore.replaceByExternalId(itemId, nodes) + await knowledgeItemService.updateStatus(itemId, 'completed') + }) + + ctx.reportProgress(100, { stage: 'done', currentFile: 1, totalFiles: 1 }) + }, + + // Flip knowledge_item.status to 'failed' once retries exhaust or the job is + // cancelled. Without this, the item lingers in 'processing' and + // reconcileContainers keeps the parent in 'processing' forever — UI shows a + // perpetual spinner because startup recovery does not resurrect terminal + // (failed/cancelled) job rows. + async onSettled(event) { + if (event.status === 'completed') return + + const jobManager = application.get('JobManager') + const snapshot = await jobManager.get(event.jobId) + const input = snapshot?.input as { itemId?: string } | undefined + if (!input?.itemId) return + + const reason = event.error?.message?.trim() || `Job ${event.status}` + try { + await knowledgeItemService.updateStatus(input.itemId, 'failed', { error: reason }) + } catch (error) { + // Item was deleted concurrently (deleteBase / deleteItems race) — nothing to flip. + if (isDataApiError(error) && error.code === ErrorCode.NOT_FOUND) return + logger.error( + 'Failed to flip knowledge item to failed in onSettled', + error instanceof Error ? error : new Error(String(error)), + { jobId: event.jobId, itemId: input.itemId } + ) + } + } +} diff --git a/src/main/services/knowledge/tasks/jobTypes.ts b/src/main/services/knowledge/tasks/jobTypes.ts new file mode 100644 index 0000000000..62ed55070d --- /dev/null +++ b/src/main/services/knowledge/tasks/jobTypes.ts @@ -0,0 +1,28 @@ +/** + * Knowledge module job type registrations. + * + * Co-locates the `declare module` extensions for every JobRegistry entry the + * knowledge module owns. Handler files import this module purely for the + * ambient type-merge side effect, which guarantees that any handler in this + * directory can safely call `jobManager.enqueue('knowledge.*')` even when it + * does not directly reference the sibling handler. + */ + +import type { JobPayloadOf } from '@main/core/job/jobRegistry' + +declare module '@main/core/job/jobRegistry' { + interface JobRegistry { + 'knowledge.prepare-root': { + baseId: string + itemId: string + } + 'knowledge.index-leaf': { + baseId: string + itemId: string + parentJobId: string | null + } + } +} + +export type KnowledgePrepareRootPayload = JobPayloadOf<'knowledge.prepare-root'> +export type KnowledgeIndexLeafPayload = JobPayloadOf<'knowledge.index-leaf'> diff --git a/src/main/services/knowledge/tasks/prepareRootJobHandler.ts b/src/main/services/knowledge/tasks/prepareRootJobHandler.ts new file mode 100644 index 0000000000..a85bbffb33 --- /dev/null +++ b/src/main/services/knowledge/tasks/prepareRootJobHandler.ts @@ -0,0 +1,178 @@ +// Side-effect import — picks up declare-module merges for both knowledge job +// types, so the `jobManager.enqueue('knowledge.index-leaf', …)` call below +// type-checks without a direct dependency on indexLeafJobHandler.ts. +import './jobTypes' + +import { application } from '@application' +import { knowledgeBaseService } from '@data/services/KnowledgeBaseService' +import { knowledgeItemService } from '@data/services/KnowledgeItemService' +import { loggerService } from '@logger' +import type { JobHandler } from '@main/core/job/types' +import { ErrorCode, isDataApiError } from '@shared/data/api' +import type { KnowledgeItem } from '@shared/data/types/knowledge' + +import { prepareKnowledgeItem } from '../runtime/utils/prepare' +import type { KnowledgePrepareRootPayload } from './jobTypes' + +const logger = loggerService.withContext('prepareRootJobHandler') + +const ACTIVE_STATUSES = ['pending', 'delayed', 'running'] as const +const ACTIVE_JOB_LIMIT = 5000 + +export const prepareRootJobHandler: JobHandler = { + recovery: 'retry', + defaultQueue: (input) => `base.${input.baseId}`, + defaultConcurrency: 5, + defaultRetryPolicy: { + maxAttempts: 3, + backoff: 'exponential', + baseDelayMs: 2000, + maxDelayMs: 60_000 + }, + defaultTimeoutMs: 10 * 60 * 1000, + + async execute(ctx) { + const { baseId, itemId } = ctx.input + const runtime = application.get('KnowledgeRuntimeService') + const jobManager = application.get('JobManager') + + ctx.signal.throwIfAborted() + // Treat NOT_FOUND on either lookup as "base was deleted concurrently" — + // return cleanly so the job settles as 'completed' rather than burning + // retry attempts on dead rows. + let item: KnowledgeItem + try { + await knowledgeBaseService.getById(baseId) + item = await knowledgeItemService.getById(itemId) + } catch (error) { + if (isDataApiError(error) && error.code === ErrorCode.NOT_FOUND) { + logger.info('Skipping prepare-root for missing base or item (likely deleted concurrently)', { + baseId, + itemId, + jobId: ctx.jobId + }) + ctx.reportProgress(100, { stage: 'item-gone' }) + return + } + throw error + } + + // Idempotent retry preamble — safe to run on every attempt: + // + // (1) Cancel any orphan index-leaf jobs left over from a prior attempt of + // THIS prepare-root. We identify them by `parentJobId === ctx.jobId` + // (the jobId is stable across retries). Without this, recovered child + // jobs would dispatch and fail with NOT_FOUND once we delete their + // leaf rows below. + const activeJobs = await jobManager.list({ + queue: `base.${baseId}`, + status: [...ACTIVE_STATUSES], + limit: ACTIVE_JOB_LIMIT + }) + const ourOrphans = activeJobs.filter((job) => { + if (job.id === ctx.jobId) return false + const payload = job.input as { parentJobId?: string | null } | null + return payload?.parentJobId === ctx.jobId + }) + + if (ourOrphans.length > 0) { + logger.info('Cancelling orphan child jobs from previous attempt', { + baseId, + itemId, + jobId: ctx.jobId, + orphanCount: ourOrphans.length + }) + await Promise.all( + ourOrphans.map((job) => + jobManager.cancel(job.id, 'prepare-root-retry').catch((error) => { + logger.warn('Failed to cancel orphan index-leaf job (already terminal?)', { + orphanJobId: job.id, + error: error instanceof Error ? error.message : String(error) + }) + }) + ) + ) + } + + // (2) Delete leaf descendants created by previous attempts so a fresh scan + // does not collide with stale rows. Wrapped in Layer 3 lock to + // serialize against any concurrent indexer touching the base. + await runtime.runWithBaseWriteLockForBase(baseId, async () => { + await knowledgeItemService.deleteLeafDescendantItems(baseId, [itemId]) + }) + + ctx.signal.throwIfAborted() + ctx.reportProgress(0, { stage: 'scanning' }) + + // Expand the container into leaf items inside Layer 3 lock. The + // prepareKnowledgeItem helper expects a `runMutation` adapter so it can + // serialize DB writes; we are already holding the lock, so the adapter + // becomes the identity function. + const leafItems = await runtime.runWithBaseWriteLockForBase(baseId, async () => { + ctx.signal.throwIfAborted() + const leaves = await prepareKnowledgeItem({ + baseId, + item, + onCreatedItem: () => {}, + runMutation: async (task) => await task(), + signal: ctx.signal + }) + await knowledgeItemService.updateStatus(itemId, 'processing') + return leaves + }) + + ctx.reportProgress(50, { + stage: 'enqueuing', + currentFile: 0, + totalFiles: leafItems.length + }) + + for (const [index, leaf] of leafItems.entries()) { + ctx.signal.throwIfAborted() + await jobManager.enqueue( + 'knowledge.index-leaf', + { baseId, itemId: leaf.id, parentJobId: ctx.jobId }, + { + idempotencyKey: `knowledge:${baseId}:${leaf.id}`, + parentId: ctx.jobId + } + ) + ctx.reportProgress(50 + Math.round(((index + 1) / Math.max(leafItems.length, 1)) * 50), { + stage: 'enqueuing', + currentFile: index + 1, + totalFiles: leafItems.length + }) + } + + ctx.reportProgress(100, { + stage: 'done', + currentFile: leafItems.length, + totalFiles: leafItems.length + }) + }, + + // Flip the container's status to 'failed' once retries exhaust or the job is + // cancelled. Without this the container stays 'processing' (its phase is + // 'preparing'); reconcileContainers' phase-non-null branch would also keep + // every ancestor stuck. + async onSettled(event) { + if (event.status === 'completed') return + + const jobManager = application.get('JobManager') + const snapshot = await jobManager.get(event.jobId) + const input = snapshot?.input as { itemId?: string } | undefined + if (!input?.itemId) return + + const reason = event.error?.message?.trim() || `Job ${event.status}` + try { + await knowledgeItemService.updateStatus(input.itemId, 'failed', { error: reason }) + } catch (error) { + if (isDataApiError(error) && error.code === ErrorCode.NOT_FOUND) return + logger.error( + 'Failed to flip knowledge container to failed in onSettled', + error instanceof Error ? error : new Error(String(error)), + { jobId: event.jobId, itemId: input.itemId } + ) + } + } +} diff --git a/src/main/services/knowledge/types/items.ts b/src/main/services/knowledge/types/items.ts index 976eb89535..5858ebda93 100644 --- a/src/main/services/knowledge/types/items.ts +++ b/src/main/services/knowledge/types/items.ts @@ -1,3 +1,6 @@ import type { KnowledgeItemOf } from '@shared/data/types/knowledge' export type IndexableKnowledgeItem = KnowledgeItemOf<'file' | 'url' | 'note'> + +export type ContainerKnowledgeItem = KnowledgeItemOf<'directory' | 'sitemap'> +export type ContainerKnowledgeItemType = ContainerKnowledgeItem['type'] diff --git a/src/main/services/knowledge/utils/items.ts b/src/main/services/knowledge/utils/items.ts index c90212322c..f4148dc0cc 100644 --- a/src/main/services/knowledge/utils/items.ts +++ b/src/main/services/knowledge/utils/items.ts @@ -1,6 +1,6 @@ import type { KnowledgeItem } from '@shared/data/types/knowledge' -import type { IndexableKnowledgeItem } from '../types/items' +import type { ContainerKnowledgeItem, IndexableKnowledgeItem } from '../types/items' export function isIndexableKnowledgeItem(item: KnowledgeItem): item is IndexableKnowledgeItem { return item.type === 'file' || item.type === 'url' || item.type === 'note' @@ -9,3 +9,7 @@ export function isIndexableKnowledgeItem(item: KnowledgeItem): item is Indexable export function filterIndexableKnowledgeItems(items: KnowledgeItem[]): IndexableKnowledgeItem[] { return items.filter(isIndexableKnowledgeItem) } + +export function isContainerKnowledgeItem(item: KnowledgeItem): item is ContainerKnowledgeItem { + return item.type === 'directory' || item.type === 'sitemap' +} diff --git a/src/main/services/knowledge/vectorstore/types.ts b/src/main/services/knowledge/vectorstore/types.ts index d29246fd13..a0046cc006 100644 --- a/src/main/services/knowledge/vectorstore/types.ts +++ b/src/main/services/knowledge/vectorstore/types.ts @@ -1,6 +1,14 @@ -import type { BaseVectorStore, Document, Metadata } from '@vectorstores/core' +import type { BaseNode, BaseVectorStore, Document, Metadata } from '@vectorstores/core' export interface KnowledgeVectorStore extends BaseVectorStore { listByExternalId(itemId: string): Promise[]> deleteByIdAndExternalId(chunkId: string, itemId: string): Promise + /** + * Atomically replace all chunks tied to `externalId` with the provided node + * set. DELETE + INSERT execute inside a single backing-store transaction so + * crash-retrying the caller cannot leave orphan chunks AND insert failure + * never destroys the pre-existing chunk set (transaction rolls back). + * Pass an empty `nodes` array to clear all chunks for the external id. + */ + replaceByExternalId(externalId: string, nodes: BaseNode[]): Promise } diff --git a/v2-refactor-temp/docs/breaking-changes/2026-05-20-knowledge-job-auto-recovery.md b/v2-refactor-temp/docs/breaking-changes/2026-05-20-knowledge-job-auto-recovery.md new file mode 100644 index 0000000000..03bc1318ed --- /dev/null +++ b/v2-refactor-temp/docs/breaking-changes/2026-05-20-knowledge-job-auto-recovery.md @@ -0,0 +1,30 @@ +--- +title: Unfinished knowledge indexing automatically resumes after restart +category: changed +severity: notice +introduced_in_pr: TBD +date: 2026-05-20 +--- + +## What changed + +Knowledge base indexing now persists every in-flight task to disk and resumes it automatically after a process restart or crash. Previously, indexing that was interrupted by quit/crash stayed stuck in "processing" forever and required the user to manually re-trigger reindex; that no longer happens. + +## Why this matters to the user + +- Indexing that was running when the app exited (graceful quit, crash, OS shutdown) is picked up again ~1 minute after the next launch — no user action needed. +- A few side effects are visible: + - Per-base concurrency moves from a shared 5-slot global pool to 5 slots **per base** with a 50-slot global cap, so importing into several bases in parallel finishes faster. + - Transient embedding failures are now retried up to 3 times with exponential backoff before the item shows as "failed". + - Each `prepare-root` (directory/sitemap scan) job has a 10-minute wall-clock cap, and each `index-leaf` (single file/url/note embed) job has a 5-minute wall-clock cap. Very large single files served by a slow embedding endpoint may now hit the leaf timeout where before they would run indefinitely. +- The `processing` status may briefly persist after force-quit; it self-heals once the recovery pass finishes after the next launch. + +## What the user should do + +Nothing — automatic. If a previously failed indexing item is still stuck after the next launch + ~1 minute, run "Reindex" from the item's context menu as before. + +## Notes for release manager + +- Recovery is gated by `JobManager.onAllReady`, which fires 60 s after the lifecycle reaches `WhenReady`. On a cold start this means indexing resumes ~60 s into the session, not at process start. +- The 5-minute per-leaf timeout is a deliberate change from "no timeout". If telemetry shows large-document users hitting it, raise `defaultTimeoutMs` on `indexLeafJobHandler` rather than rolling this back. +- v1 → v2 migrated knowledge bases that had in-flight indexing at upgrade time are mapped to `idle` / `failed` by `KnowledgeMigrator` (unchanged from prior v2 behavior). Users must manually reindex those — auto-recovery covers v2 → v2 restarts only. diff --git a/v2-refactor-temp/docs/knowledge/knowledge-backend-decisions.md b/v2-refactor-temp/docs/knowledge/knowledge-backend-decisions.md index 57468ae03d..11534c9b87 100644 --- a/v2-refactor-temp/docs/knowledge/knowledge-backend-decisions.md +++ b/v2-refactor-temp/docs/knowledge/knowledge-backend-decisions.md @@ -133,22 +133,19 @@ 它负责: 1. 创建 runtime add 传入的 `knowledge_item` -2. `prepare-root` / `index-leaf` 任务入队与执行 -3. `knowledge_item.status` / `phase` 的有限状态推进 -4. 失败与中断原因写回数据库 -5. 向量库实例的获取、删除和清理 -6. 检索后的 rerank 串联 -7. stop / delete / reindex 时的 queue 中断与向量清理补偿 +2. 注册 `knowledge.prepare-root` / `knowledge.index-leaf` JobHandler,并把 root item 入队到 `JobManager` +3. `knowledge_item.status` / `phase` 的有限状态推进(含 handler `onSettled` 在 retry 耗尽 / cancel 时把 item 标记为 `failed`) +4. 向量库实例的获取、删除,以及 delete / reindex 时的向量清理 +5. 检索后的 rerank 串联 +6. delete / reindex 时通过 `jobManager.list + filter + cancel` 取消相关 job,并等待 Layer 3 base write lock 释放 它不负责: 1. `knowledge_base` 的主数据 CRUD 2. caller-facing IPC workflow 编排 3. `directory` / `sitemap` owner item 的对外展开入口 -4. 持久化任务队列 -5. 自动重试 -6. 恢复未完成索引任务继续执行 -7. 暴露调度器内部概念给调用方 +4. 任务队列的进程内实现(由 `JobManager` 提供持久化、调度、startup recovery、retry) +5. 向调用方暴露 `JobManager` / queue / job id 等调度内部概念 ## 3.1 `KnowledgeOrchestrationService` 的定位 @@ -286,15 +283,12 @@ item 删除时,调用方应理解为两件独立的事: 1. runtime IPC `delete-items` - 通过 orchestration 进入删除 workflow - 将传入 ids 归一化为 top-level roots - - 中断 root `prepare-root` / `index-leaf` - - fresh 查询 descendants - - 中断 descendants 的 `prepare-root` / `index-leaf` - - 删除 item 及其级联子项的向量 + - `jobManager.list({ queue: 'base.${baseId}', status: non-terminal }) + filter` 取出 subtree 内的 active job,并 `jobManager.cancel(...)` 取消 + - 在 Layer 3 base write lock 内删除 item 及其级联子项的向量 2. orchestration 在 runtime cleanup 后删除 SQLite root rows - 数据库 cascade 删除 grouped descendants -base 删除时会先中断并等待该 base 的 runtime work,然后删除 SQLite base 和关联 items。 -SQLite 删除成功后,再 best-effort 删除该 base 的 vector artifacts;artifact 清理失败只记录日志,不回滚已完成的 SQLite 删除。 +base 删除时先 `cancelAllJobsForBase(baseId)`,再 `waitForBaseWriteLocks(baseId, 35s)`,然后删除 vector artifacts,最后删除 SQLite base。artifact 清理失败时 SQLite 行保留,用户可从 UI 重试删除。 当前实现下,Data API 删除并不会替调用方清理向量库,也不会替调用方中断 runtime 任务。 @@ -322,53 +316,27 @@ IPC create-base(CreateKnowledgeBaseDto) ```text IPC delete-base(baseId) - -> KnowledgeRuntimeService.deleteBase(baseId) - -> KnowledgeBaseService.delete(baseId) + -> KnowledgeRuntimeService.cancelAllJobsForBase(baseId) + -> KnowledgeRuntimeService.waitForBaseWriteLocks(baseId, 35_000) -> KnowledgeRuntimeService.deleteBaseArtifacts(baseId) + -> KnowledgeBaseService.delete(baseId) ``` -runtime 删除阶段会先中断该 base 下 pending / running runtime task,等待 running task settle,并返回被中断 item ids。 -随后 data service 删除 SQLite base 和关联 items。 -SQLite 删除成功后,orchestration 再调用 artifact cleanup 删除该 base 对应的 vector store;该 cleanup 失败只记录日志。 -如果 SQLite 删除失败,orchestration 会把被中断 items 标记为 failed,然后把 SQLite 删除错误抛给调用方。 +orchestration 通过 `JobManager.cancelMany({ queue: 'base.${baseId}' })` 取消该 base 的全部 active job,然后等待 Layer 3 base write lock 在 35s 内 drain(超时只记录 warn)。先删 vector artifacts、再删 SQLite base:artifact 删除失败时 SQLite 行保留,用户可从 UI 重试;SQLite 删除失败时已删 artifacts 不会恢复,orchestration 抛出 `invalidOperation`。job 状态由 `JobManager` 自行 finalize,handler `onSettled` 把对应 `knowledge_item.status` 翻为 `failed`。 ## 6. 当前 Queue 模型 -当前实现使用一个进程内 runtime queue: +队列实现完全收敛到 `JobManager`: -1. queue 持有者是 `KnowledgeRuntimeService` -2. queue 为单实例 in-memory queue -3. 默认 `concurrency = 5` -4. 所有 base 的 runtime task 共用这一条 queue -5. queue task 分为 `prepare-root` 和 `index-leaf` -6. delete / reindex 不进入 queue,而是先中断相关 runtime task,再直接删除向量 +1. 每个 base 一条独立队列 `base.${baseId}` +2. 任务类型:`knowledge.prepare-root` 与 `knowledge.index-leaf` +3. 持久化:每个 job 落 `jobTable`;进程崩溃后由 `JobManager.onAllReady` 在 60s 后跑 startup recovery,把残留的 `running` 行翻回 `pending` 并重新 dispatch +4. 并发:默认 per-base 并发 5,全局 cap 50(由 `JobManager` 控制) +5. retry:`recovery: 'retry'`,最多 3 次,指数退避(leaf 1s→30s,prepare-root 2s→60s) +6. 同一 base 的写串行化由 `KnowledgeRuntimeService.runWithBaseWriteLockForBase` 在 handler 内部承担(Layer 3 mutex,跨 handler 实例共享) +7. delete / reindex 不再入队,而是 `jobManager.list + filter + jobManager.cancel`,再走 `runWithBaseWriteLockForBase` 直接清向量 -当前实现没有落地以下旧设计假设: - -1. 不是“每个 knowledge base 一条串行 queue” -2. 不是 round-robin scheduler -3. 没有全局持久化任务表 - -queue 内部维护 `entries` map,entry 上记录: - -1. `base` -2. `baseId` -3. `itemId` -4. `kind = prepare-root | index-leaf` -5. `status = pending | running | settled` -6. `controller` -7. `promise` -8. `runPromise` -9. `interruptError` - -这些状态只用于: - -1. 跟踪哪些 runtime task 仍在等待执行 -2. 跟踪哪些 runtime task 正在运行 -3. 在 delete / reindex / shutdown 时中断对应任务 -4. 在 shutdown 时识别哪些 item 被中断并做失败补偿 - -它们不是对外数据模型的一部分。 +进程内不再有 `entries` map / `controller` / `runPromise` / `interruptError` 等内存队列状态——这些概念已下沉到 `JobManager`。 ## 7. 当前索引执行链路 @@ -378,29 +346,21 @@ queue 内部维护 `entries` map,entry 上记录: addItems -> create leaf item -> status = processing, phase = null - -> queue task index-leaf - -> phase = reading - -> loadKnowledgeItemDocuments(item) - -> chunkDocuments(base, item, documents) - -> phase = embedding - -> getEmbedModel(base) - -> embedDocuments(model, chunks) - -> runWithBaseWriteLock - -> KnowledgeVectorStoreService.createStore(base) - -> vectorStore.add(nodes) - -> status = completed, phase = null + -> enqueue knowledge.index-leaf + -> handler.execute: + -> phase = reading + -> loadKnowledgeItemDocuments(item) + -> chunkDocuments(base, item, documents) + -> phase = embedding + -> getEmbedModel(base) + -> embedDocuments(model, chunks) + -> runWithBaseWriteLockForBase + -> KnowledgeVectorStoreService.createStore(base) + -> vectorStore.replaceByExternalId(itemId, nodes) // 单事务 DELETE + INSERT + -> status = completed, phase = null ``` -任意非中断错误抛出时: - -```text -catch error - -> logger.error(...) - -> best-effort cleanup vectors - -> status = failed, phase = null - -> error = normalizedError.message - -> 向上抛出异常 -``` +非中断错误抛出时,由 `JobManager` 调度 retry(最多 3 次)。Retry 耗尽或 job cancel 时 handler `onSettled` 把 `knowledge_item.status` 翻为 `failed`,error message 写入行;旧 chunks 由 `replaceByExternalId` 的事务保留(未发生过的 INSERT 不会改动 DB)。 `directory` / `sitemap` 的一次 preparation 流程,当前是: @@ -449,7 +409,7 @@ preparation 被 interrupt 时: 2. `processing, phase = reading`:leaf 正在读取 source documents 3. `processing, phase = embedding`:leaf 正在 embedding / 写入 vector store 4. `completed, phase = null`:leaf indexing 完成,或 container 没有 active children -5. `failed, phase = null`:runtime task 失败、interrupt cleanup 失败,或 shutdown 中断补偿 +5. `failed, phase = null`:handler `onSettled` 在 retry 耗尽或 cancel 时写入,error 字段保留原因 也就是说: @@ -467,26 +427,22 @@ preparation 被 interrupt 时: 当前做一件事: -1. 重新创建进程内 `KnowledgeQueueManager` +1. 向 `JobManager` 注册 `knowledge.prepare-root` 与 `knowledge.index-leaf` 两个 `JobHandler`。 -当前没有启动时“扫描中间状态并补偿失败”或“自动恢复索引任务”的逻辑。 +启动时的「自动恢复」由 `JobManager.onAllReady` 统一负责:在 60s 延迟后跑 startup recovery,把 `jobTable.status='running'` 的行翻回 `'pending'`,handler 被重新 dispatch。`KnowledgeRuntimeService` 不再独立扫描中间状态。 ### 9.2 `KnowledgeRuntimeService.onStop` 当前 stop 流程是: -1. 调用 `queue.interruptAll(SHUTDOWN_INTERRUPTED_REASON)` -2. 收集中断的 `prepare-root` / `index-leaf` entries -3. 等待相关 running task settle -4. 对 `index-leaf` 清理对应 leaf vectors -5. 对 `prepare-root` fresh 查询 descendants,并清理 root / descendants vectors -6. 将这些 item 批量写为 `failed` +1. `jobManager.cancelMany({ type: 'knowledge.prepare-root' })` 与 `jobManager.cancelMany({ type: 'knowledge.index-leaf' })` 取消两类 active job +2. `waitForBaseWriteLocks()` 等待全部 Layer 3 base write lock 释放 这意味着: -1. 当前做了停止时的失败补偿 -2. 当前会在 stop 时清理被中断 item 的向量残留 -3. 但没有做重启后的自动恢复 +1. 不再在 stop 时把 item.status 从 `processing` 回滚到 `idle`/`failed`;item 短暂停留在 `processing` 是预期。 +2. 重启后由 `JobManager.onAllReady` startup recovery 自动重新 dispatch;handler 入口的 `item.status === 'completed'` 早退分支保证不会浪费 embedding 调用。 +3. 不再在 stop 时清理被中断 item 的向量残留;vector 一致性由 handler 内 `LibSQLVectorStore.replaceByExternalId`(DELETE + INSERT 单事务)保证。 ### 9.3 `KnowledgeVectorStoreService.onStop` @@ -626,27 +582,22 @@ getEmbedModel(base) 当前实现没有做: -1. 每个 base 一条串行 queue -2. round-robin scheduler -3. 独立的 `KnowledgeTaskService` -4. 独立的 `KnowledgeExecutionService` -5. 持久化任务队列 -6. 自动恢复索引继续执行 -7. 自动重试 -8. chunk 级 queue -9. 用户添加 nested `directory` / `sitemap` -10. 真正可用的 rerank runtime 配置接入 -11. 非 `ollama` embedding provider 支持 -12. `fileProcessorId` 驱动的文件处理链路 +1. round-robin scheduler +2. 独立的 `KnowledgeTaskService` +3. 独立的 `KnowledgeExecutionService` +4. chunk 级 queue +5. 用户添加 nested `directory` / `sitemap` +6. 真正可用的 rerank runtime 配置接入 +7. 非 `ollama` embedding provider 支持 +8. `fileProcessorId` 驱动的文件处理链路 ## 13. 后续更新本文档时的原则 后续只有在以下行为真正落地之后,才应更新本文档: -1. runtime queue 从单队列改成 per-base queue -2. rerank runtime 配置真正接通 -3. `fileProcessorId` 开始参与 runtime 执行链路 -4. 用户添加 nested `directory` / `sitemap` -5. queue interrupt 从当前 root + fresh descendants 模型改成 stable-loop 或 generation/runId 模型 +1. rerank runtime 配置真正接通 +2. `fileProcessorId` 开始参与 runtime 执行链路 +3. 用户添加 nested `directory` / `sitemap` +4. queue interrupt 从当前 list+filter+cancel 模型改成 stable-loop 或 generation/runId 模型 在这些行为落地之前,文档应继续以“当前已实现”为准,不提前写成目标设计。 diff --git a/v2-refactor-temp/docs/knowledge/knowledge-todo.md b/v2-refactor-temp/docs/knowledge/knowledge-todo.md index eb5108d73a..66470d4c1e 100644 --- a/v2-refactor-temp/docs/knowledge/knowledge-todo.md +++ b/v2-refactor-temp/docs/knowledge/knowledge-todo.md @@ -63,17 +63,16 @@ ## 4. Runtime 与任务队列 -- 明确 in-memory queue 的产品边界。 - - 当前队列是单进程内存队列,默认并发 5。 - - 同一 base 的写入通过 per-base write lock 串行化。 - - 当前没有持久化任务表、自动重试、重启后自动恢复。 - - 后续如果需要可靠任务恢复,应增加持久化任务模型和恢复策略。 - - 参考:`src/main/services/knowledge/queue/KnowledgeQueueManager.ts` +- ✅ 持久化任务表 + 自动恢复(Phase 4 已落地)。 + - knowledge.prepare-root / knowledge.index-leaf 走 `JobManager`,`jobTable` 持久化。 + - 默认 per-base 并发 5、全局 cap 50;同一 base 的写入仍通过 `KnowledgeRuntimeService.runWithBaseWriteLockForBase` 串行。 + - `recovery: 'retry'` + `JobManager.onAllReady` 启动后 60s 跑 startup recovery,自动重新 dispatch 未完成 job。 + - 参考:`src/main/services/knowledge/tasks/prepareRootJobHandler.ts`、`src/main/services/knowledge/tasks/indexLeafJobHandler.ts`。 - 收敛失败清理与恢复体验。 - - 运行中任务在 shutdown / delete / reindex 中断后会尝试清理向量并标记 item failed。 - - 部分失败状态持久化和清理属于 best-effort。 - - 后续需要决定是否提供更明确的用户可见恢复入口或后台修复任务。 + - shutdown 不再 fail items;重启后由 startup recovery 自动重新 dispatch(handler 入口对 `item.status === 'completed'` 早退)。 + - delete / reindex 走业务层 list + filter + `jobManager.cancel`,残留 vectors 由 `LibSQLVectorStore.replaceByExternalId` 的单事务原子性保证不出现双倍 chunk。 + - 后续若需要更明确的用户可见恢复入口或后台修复任务,可在此基础上构建。 - 参考:`src/main/services/knowledge/runtime/KnowledgeRuntimeService.ts` - 处理 base 删除后的 artifact 清理风险。 @@ -103,9 +102,8 @@ ## 6. 发布与文档收尾 -- 补 Knowledge V2 的 breaking changes 记录。 - - 用户可感知的 v2 变更应写入 `v2-refactor-temp/docs/breaking-changes/`。 - - 当前尚未看到 Knowledge V2 专项条目。 +- ✅ Knowledge V2 首条 breaking changes 已落地(`2026-05-20-knowledge-job-auto-recovery.md`)。 + - 后续若再有用户可感知的 v2 变更,继续写入 `v2-refactor-temp/docs/breaking-changes/`。 - 参考:`v2-refactor-temp/docs/breaking-changes/README.md` - 更新后端决策文档中已落地的变化。