Files
CherryHQ-cherry-studio/packages/vectorstores/libsql/tests/LibSQLVectorStore.test.ts
2026-05-01 19:24:48 +08:00

1437 lines
44 KiB
TypeScript

import { createClient } from '@libsql/client'
import type { BaseNode, MetadataFilters, VectorStoreQuery } from '@vectorstores/core'
import {
FilterCondition,
FilterOperator,
type Metadata,
MetadataMode,
NodeRelationship,
TextNode,
VectorStoreQueryMode
} from '@vectorstores/core'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { LibSQLVectorStore } from '../src/LibSQLVectorStore.js'
describe('LibSQLVectorStore', () => {
let store: LibSQLVectorStore
let client: ReturnType<typeof createClient>
beforeEach(() => {
// Use in-memory database for testing
client = createClient({
url: ':memory:'
})
store = new LibSQLVectorStore({
client,
tableName: 'test_embeddings',
dimensions: 2
})
})
describe('Basic Operations', () => {
it('should initialize with default configuration', () => {
const defaultStore = new LibSQLVectorStore({
clientConfig: { url: ':memory:' }
})
expect(defaultStore).toBeDefined()
expect(defaultStore.storesText).toBe(true)
})
it('should default to in-memory client when no clientConfig or client provided', () => {
const previousUrl = process.env.LIBSQL_URL
const previousAuth = process.env.LIBSQL_AUTH_TOKEN
delete process.env.LIBSQL_URL
delete process.env.LIBSQL_AUTH_TOKEN
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
const fallbackStore = new LibSQLVectorStore({})
warnSpy.mockRestore()
if (previousUrl) process.env.LIBSQL_URL = previousUrl
else delete process.env.LIBSQL_URL
if (previousAuth) process.env.LIBSQL_AUTH_TOKEN = previousAuth
else delete process.env.LIBSQL_AUTH_TOKEN
expect(fallbackStore.client()).toBeDefined()
})
it('should set and get collection', () => {
store.setCollection('test-collection')
expect(store.getCollection()).toBe('test-collection')
})
it('should get client connection', () => {
const db = store.client()
expect(db).toBeDefined()
})
})
describe('Vector Operations', () => {
beforeEach(async () => {
// Ensure the database schema is set up
// The schema is created lazily on first operation
})
it('should add nodes to vector store', async () => {
const nodes: BaseNode<Metadata>[] = [
new TextNode({
embedding: [0.1, 0.2],
metadata: { category: 'test', score: 1.0 }
}),
new TextNode({
embedding: [0.3, 0.4],
metadata: { category: 'example', score: 0.5 }
})
]
const ids = await store.add(nodes)
expect(ids).toHaveLength(2)
expect(ids[0]).toBeDefined()
expect(ids[1]).toBeDefined()
})
it('should preserve caller metadata without injecting create_date', async () => {
const metadata: Metadata = { category: 'test', score: 1.0 }
const node = new TextNode({
id_: 'chunk-metadata-preserved',
text: 'Document chunk',
embedding: [0.1, 0.2],
metadata
})
await store.add([node])
expect(metadata).toEqual({ category: 'test', score: 1.0 })
const rows = await client.execute("SELECT metadata FROM test_embeddings WHERE id = 'chunk-metadata-preserved'")
expect(rows.rows).toHaveLength(1)
expect(JSON.parse(String(rows.rows[0]?.metadata))).toEqual({
category: 'test',
score: 1.0
})
})
it('should reject nodes with missing embeddings instead of writing zero vectors', async () => {
const node = new TextNode({
id_: 'chunk-missing-embedding',
text: 'Document chunk without embedding',
metadata: { category: 'invalid' }
})
await expect(store.add([node])).rejects.toThrow('Missing embedding for node chunk-missing-embedding')
const rows = await client.execute(
"SELECT COUNT(*) as count FROM test_embeddings WHERE id = 'chunk-missing-embedding'"
)
expect(Number(rows.rows[0]?.count ?? 0)).toBe(0)
})
it('should reject nodes with mismatched embedding dimensions', async () => {
const node = new TextNode({
id_: 'chunk-bad-dimensions',
text: 'Document chunk with mismatched embedding dimensions',
embedding: [0.1, 0.2, 0.3],
metadata: { category: 'invalid' }
})
await expect(store.add([node])).rejects.toThrow(
'Embedding dimension mismatch for node chunk-bad-dimensions: expected 2, got 3'
)
const rows = await client.execute(
"SELECT COUNT(*) as count FROM test_embeddings WHERE id = 'chunk-bad-dimensions'"
)
expect(Number(rows.rows[0]?.count ?? 0)).toBe(0)
})
it('should persist external_id from sourceNode.nodeId', async () => {
const node = new TextNode({
id_: 'chunk-1',
text: 'Document chunk',
embedding: [0.1, 0.2],
metadata: { category: 'test' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
await store.add([node])
const rows = await client.execute('SELECT id, external_id, collection FROM test_embeddings')
expect(rows.rows).toHaveLength(1)
expect(rows.rows[0]).toMatchObject({
id: 'chunk-1',
external_id: 'item-1',
collection: store.getCollection()
})
})
it('should fall back to node.id_ when sourceNode.nodeId is missing', async () => {
const node = new TextNode({
id_: 'chunk-2',
text: 'Document chunk without source node',
embedding: [0.3, 0.4],
metadata: { category: 'fallback' }
})
await store.add([node])
const rows = await client.execute("SELECT id, external_id FROM test_embeddings WHERE id = 'chunk-2'")
expect(rows.rows).toHaveLength(1)
expect(rows.rows[0]).toMatchObject({
id: 'chunk-2',
external_id: 'chunk-2'
})
})
it('should query vectors by similarity', async () => {
// Add test data
const nodes: BaseNode<Metadata>[] = [
new TextNode({
text: 'First document',
embedding: [1.0, 0.0],
metadata: { category: 'doc1' }
}),
new TextNode({
text: 'Second document',
embedding: [0.0, 1.0],
metadata: { category: 'doc2' }
})
]
await store.add(nodes)
// Query for similar vectors
const query: VectorStoreQuery = {
queryEmbedding: [0.9, 0.1],
similarityTopK: 2,
mode: VectorStoreQueryMode.DEFAULT
}
const result = await store.query(query)
expect(result.nodes).toHaveLength(2)
expect(result.ids).toHaveLength(2)
expect(result.similarities).toHaveLength(2)
// First result should be more similar (closer to [1.0, 0.0])
expect(result.similarities[0]).toBeGreaterThan(result.similarities[1])
})
it('should expose itemId from external_id in query results', async () => {
const node = new TextNode({
id_: 'chunk-knowledge-1',
text: 'Knowledge document',
embedding: [1.0, 0.0],
metadata: { source: '/tmp/doc.md' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-knowledge-1',
metadata: {}
}
}
})
await store.add([node])
const result = await store.query({
queryEmbedding: [1.0, 0.0],
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
})
expect(result.nodes).toHaveLength(1)
expect(result.nodes?.[0]?.metadata).toMatchObject({
source: '/tmp/doc.md',
itemId: 'item-knowledge-1'
})
})
it('should tolerate invalid metadata JSON in vector query results', async () => {
await store.add([
new TextNode({
id_: 'chunk-invalid-metadata-vector',
text: 'Knowledge document',
embedding: [1.0, 0.0],
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-invalid-metadata-vector',
metadata: {}
}
}
})
])
await client.execute({
sql: 'UPDATE test_embeddings SET metadata = ? WHERE id = ?',
args: ['{"itemId":', 'chunk-invalid-metadata-vector']
})
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
const result = await store.query({
queryEmbedding: [1.0, 0.0],
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
})
expect(result.nodes).toHaveLength(1)
expect(result.nodes?.[0]?.metadata).toMatchObject({
itemId: 'item-invalid-metadata-vector'
})
expect(warnSpy).toHaveBeenCalledWith(
'Failed to parse metadata JSON for row chunk-invalid-metadata-vector',
expect.any(Error)
)
warnSpy.mockRestore()
})
it('should tolerate invalid metadata JSON in bm25 query results', async () => {
await store.add([
new TextNode({
id_: 'chunk-invalid-metadata-bm25',
text: 'searchable bm25 document',
embedding: [1.0, 0.0],
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-invalid-metadata-bm25',
metadata: {}
}
}
})
])
await client.execute({
sql: 'UPDATE test_embeddings SET metadata = ? WHERE id = ?',
args: ['{"itemId":', 'chunk-invalid-metadata-bm25']
})
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
const result = await store.query({
queryStr: 'searchable',
similarityTopK: 1,
mode: VectorStoreQueryMode.BM25
})
expect(result.nodes).toHaveLength(1)
expect(result.nodes?.[0]?.metadata).toMatchObject({
itemId: 'item-invalid-metadata-bm25'
})
expect(warnSpy).toHaveBeenCalledWith(
'Failed to parse metadata JSON for row chunk-invalid-metadata-bm25',
expect.any(Error)
)
warnSpy.mockRestore()
})
it('should preserve the original cause when bm25 execution fails', async () => {
await store.add([
new TextNode({
id_: 'chunk-bm25-failure',
text: 'searchable document',
embedding: [1.0, 0.0],
metadata: { category: 'test' }
})
])
const originalExecute = client.execute.bind(client)
const executeSpy = vi.spyOn(client, 'execute').mockImplementation(async (statement: any) => {
const sql = typeof statement === 'string' ? statement : statement.sql
if (typeof sql === 'string' && sql.includes('bm25(')) {
throw new Error('fts execution failed')
}
return await originalExecute(statement)
})
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
try {
await store.query({
queryStr: 'searchable',
similarityTopK: 1,
mode: VectorStoreQueryMode.BM25
})
throw new Error('Expected BM25 query to fail')
} catch (error) {
expect(error).toBeInstanceOf(Error)
expect((error as Error).message).toBe('BM25 search failed')
expect((error as Error & { cause?: unknown }).cause).toBeInstanceOf(Error)
expect(((error as Error & { cause?: Error }).cause as Error).message).toBe('fts execution failed')
}
expect(warnSpy).toHaveBeenCalledWith('FTS5 search failed:', expect.any(Error))
warnSpy.mockRestore()
executeSpy.mockRestore()
})
it('should handle empty add request', async () => {
const ids = await store.add([])
expect(ids).toEqual([])
})
it('should throw when SQL arguments would contain invalid nullish values', async () => {
const invalidNode = {
id_: '',
metadata: { category: 'test' },
sourceNode: undefined,
getEmbedding: () => [0.1, 0.2],
getContent: () => 'Document chunk'
} as unknown as BaseNode<Metadata>
await expect(store.add([invalidNode])).rejects.toThrow('Invalid libSQL argument at index 0: null')
})
it('should fail initialization when FTS schema creation fails', async () => {
const originalExecute = client.execute.bind(client)
const executeSpy = vi.spyOn(client, 'execute').mockImplementation(async (statement: any) => {
const sql = typeof statement === 'string' ? statement : statement.sql
if (typeof sql === 'string' && sql.includes('CREATE VIRTUAL TABLE IF NOT EXISTS test_embeddings_fts')) {
throw new Error('fts creation failed')
}
return await originalExecute(statement)
})
const node = new TextNode({
id_: 'chunk-fts-fail',
text: 'Document chunk',
embedding: [0.1, 0.2],
metadata: { category: 'test' }
})
await expect(store.add([node])).rejects.toThrow('fts creation failed')
executeSpy.mockRestore()
})
it('should only run schema initialization once for concurrent callers', async () => {
let checkSchemaCalls = 0
let resolveInitialization!: () => void
const initializationBarrier = new Promise<void>((resolve) => {
resolveInitialization = resolve
})
const originalCheckSchema = (store as any).checkSchema.bind(store) as (clientArg: unknown) => Promise<void>
const checkSchemaSpy = vi.spyOn(store as any, 'checkSchema').mockImplementation(async (clientArg: unknown) => {
checkSchemaCalls += 1
await initializationBarrier
return await originalCheckSchema(clientArg)
})
const firstAddPromise = store.add([
new TextNode({
id_: 'chunk-concurrent-1',
text: 'Concurrent document 1',
embedding: [0.1, 0.2],
metadata: { category: 'first' }
})
])
const secondAddPromise = store.add([
new TextNode({
id_: 'chunk-concurrent-2',
text: 'Concurrent document 2',
embedding: [0.2, 0.1],
metadata: { category: 'second' }
})
])
await vi.waitFor(() => {
expect(checkSchemaCalls).toBe(1)
})
resolveInitialization()
await expect(Promise.all([firstAddPromise, secondAddPromise])).resolves.toEqual([
['chunk-concurrent-1'],
['chunk-concurrent-2']
])
expect(checkSchemaCalls).toBe(1)
checkSchemaSpy.mockRestore()
})
it('should rebuild FTS only when the FTS table is first created', async () => {
let rebuildCount = 0
const originalExecute = client.execute.bind(client)
const executeSpy = vi.spyOn(client, 'execute').mockImplementation(async (statement: any) => {
const sql = typeof statement === 'string' ? statement : statement.sql
if (typeof sql === 'string' && sql.includes("VALUES ('rebuild')")) {
rebuildCount += 1
}
return await originalExecute(statement)
})
await store.add([
new TextNode({
id_: 'chunk-first-init',
text: 'First document',
embedding: [0.1, 0.2],
metadata: { category: 'first' }
})
])
const secondStore = new LibSQLVectorStore({
client,
tableName: 'test_embeddings',
dimensions: 2
})
await secondStore.add([
new TextNode({
id_: 'chunk-second-init',
text: 'Second document',
embedding: [0.2, 0.1],
metadata: { category: 'second' }
})
])
expect(rebuildCount).toBe(1)
executeSpy.mockRestore()
})
it('should delete all nodes by external_id', async () => {
const nodeA = new TextNode({
id_: 'chunk-1',
text: 'Document chunk A',
embedding: [0.1, 0.2],
metadata: { category: 'test' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
const nodeB = new TextNode({
id_: 'chunk-2',
text: 'Document chunk B',
embedding: [0.1, 0.2],
metadata: { category: 'test' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
await store.add([nodeA, nodeB])
const queryBefore: VectorStoreQuery = {
queryEmbedding: [0.1, 0.2],
similarityTopK: 2,
mode: VectorStoreQueryMode.DEFAULT
}
const resultBefore = await store.query(queryBefore)
expect(resultBefore.nodes).toHaveLength(2)
await store.delete('item-1')
const queryAfter: VectorStoreQuery = {
queryEmbedding: [0.1, 0.2],
similarityTopK: 2,
mode: VectorStoreQueryMode.DEFAULT
}
const resultAfter = await store.query(queryAfter)
expect(resultAfter.nodes).toHaveLength(0)
})
it('should scope delete by collection', async () => {
const otherCollectionStore = new LibSQLVectorStore({
client,
tableName: 'test_embeddings',
dimensions: 2,
collection: 'other'
})
const nodeDefault = new TextNode({
id_: 'chunk-default',
text: 'Default collection chunk',
embedding: [0.2, 0.3],
metadata: { category: 'scope' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-shared',
metadata: {}
}
}
})
const nodeOther = new TextNode({
id_: 'chunk-other',
text: 'Other collection chunk',
embedding: [0.2, 0.3],
metadata: { category: 'scope' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-shared',
metadata: {}
}
}
})
await store.add([nodeDefault])
await otherCollectionStore.add([nodeOther])
await store.delete('item-shared')
const rows = await client.execute(
"SELECT id, external_id, collection FROM test_embeddings WHERE external_id = 'item-shared' ORDER BY id"
)
expect(rows.rows).toHaveLength(1)
expect(rows.rows[0]).toMatchObject({
id: 'chunk-other',
external_id: 'item-shared',
collection: 'other'
})
})
})
describe('Metadata Filtering', () => {
const filterCases: Array<{
title: string
filters: MetadataFilters
queryEmbedding?: number[]
expectedCount: number
assert?: (nodes: BaseNode<Metadata>[]) => void
}> = [
{
title: 'metadata equality',
filters: {
filters: [
{
key: 'category',
value: 'technology',
operator: FilterOperator.EQ
}
]
},
expectedCount: 2,
assert: (nodes) => nodes.forEach((node) => expect(node.metadata?.category).toBe('technology'))
},
{
title: 'numeric comparison',
filters: {
filters: [{ key: 'rating', value: 4, operator: FilterOperator.GTE }]
},
expectedCount: 2,
assert: (nodes) => nodes.forEach((node) => expect(node.metadata?.rating).toBeGreaterThanOrEqual(4))
},
{
title: 'combined AND',
filters: {
filters: [
{
key: 'category',
value: 'technology',
operator: FilterOperator.EQ
},
{ key: 'rating', value: 4, operator: FilterOperator.GTE }
],
condition: FilterCondition.AND
},
expectedCount: 2,
assert: (nodes) => {
const ratings = nodes.map((node) => node.metadata?.rating)
expect(ratings).toContain(4)
expect(ratings).toContain(5)
nodes.forEach((node) => expect(node.metadata?.category).toBe('technology'))
}
},
{
title: 'text match',
filters: {
filters: [{ key: 'tags', value: 'ai', operator: FilterOperator.TEXT_MATCH }]
},
queryEmbedding: [1.0, 0.0],
expectedCount: 1,
assert: (nodes) => {
expect(nodes[0].metadata?.tags).toContain('ai')
}
}
]
beforeEach(async () => {
// Add test data with metadata
const nodes: BaseNode<Metadata>[] = [
new TextNode({
text: 'Document about AI',
embedding: [1.0, 0.0],
metadata: { category: 'technology', rating: 5, tags: ['ai', 'ml'] }
}),
new TextNode({
text: 'Document about cooking',
embedding: [0.0, 1.0],
metadata: {
category: 'food',
rating: 3,
tags: ['cooking', 'recipes']
}
}),
new TextNode({
text: 'Another tech document',
embedding: [0.5, 0.5],
metadata: {
category: 'technology',
rating: 4,
tags: ['programming']
}
})
]
await store.add(nodes)
})
filterCases.forEach(({ title, filters, queryEmbedding, expectedCount, assert }) => {
it(`should filter by ${title}`, async () => {
const query: VectorStoreQuery = {
queryEmbedding: queryEmbedding ?? [0.5, 0.5],
similarityTopK: 5,
filters,
mode: VectorStoreQueryMode.DEFAULT
}
const result = await store.query(query)
expect(result.nodes).toHaveLength(expectedCount)
assert?.(result.nodes as BaseNode<Metadata>[])
})
})
it('should reject invalid metadata filter keys', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.5, 0.5],
similarityTopK: 5,
filters: {
filters: [
{
key: "category') = 'technology' OR 1=1 --",
value: 'technology',
operator: FilterOperator.EQ
}
]
},
mode: VectorStoreQueryMode.DEFAULT
}
await expect(store.query(query)).rejects.toThrow(
"Invalid metadata filter key: category') = 'technology' OR 1=1 --"
)
})
})
describe('Collection Management', () => {
beforeEach(async () => {
// Add data to default collection
const nodes: BaseNode<Metadata>[] = [
new TextNode({
embedding: [0.1, 0.2],
metadata: { collection: 'default' }
})
]
await store.add(nodes)
})
it('should clear collection', async () => {
// Verify data exists
const query: VectorStoreQuery = {
queryEmbedding: [0.1, 0.2],
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
}
let result = await store.query(query)
expect(result.nodes).toHaveLength(1)
// Clear collection
await store.clearCollection()
// Verify data is gone
result = await store.query(query)
expect(result.nodes).toHaveLength(0)
})
it('should isolate data by collection', async () => {
const originalCollection = store.getCollection()
// Add data to different collection
store.setCollection('test-collection')
const newNodes: BaseNode<Metadata>[] = [
new TextNode({
embedding: [0.3, 0.4],
metadata: { collection: 'test' }
})
]
await store.add(newNodes)
// Query in test-collection should find data
let query: VectorStoreQuery = {
queryEmbedding: [0.3, 0.4],
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
}
let result = await store.query(query)
expect(result.nodes).toHaveLength(1)
// Switch back to default collection and query
store.setCollection(originalCollection)
query = {
queryEmbedding: [0.1, 0.2],
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
}
result = await store.query(query)
expect(result.nodes).toHaveLength(1)
})
})
describe('Utility Functions', () => {
it('should convert to Float32Array', async () => {
const { toFloat32Array } = await import('../src/utils.js')
const array = [0.1, 0.2, 0.3]
const result = toFloat32Array(array)
expect(result).toBeInstanceOf(Float32Array)
Array.from(result).forEach((value, idx) => {
expect(value).toBeCloseTo(array[idx], 6)
})
})
it('should convert from Float32Array', async () => {
const { fromFloat32Array } = await import('../src/utils.js')
const float32Array = new Float32Array([0.1, 0.2, 0.3])
const result = fromFloat32Array(float32Array)
result.forEach((value, idx) => {
expect(value).toBeCloseTo([0.1, 0.2, 0.3][idx], 6)
})
})
it('should throw when deserializeEmbedding receives an unsupported payload type', () => {
expect(() => (store as any).deserializeEmbedding('not-an-embedding')).toThrow(
'Unexpected embedding payload type in LibSQLVectorStore.deserializeEmbedding'
)
})
it('should throw when deserializeEmbedding receives a missing payload', () => {
expect(() => (store as any).deserializeEmbedding(null)).toThrow(
'Missing embedding payload in LibSQLVectorStore.deserializeEmbedding'
)
})
})
describe('Error Handling', () => {
it('should reject nodes with missing embeddings', async () => {
const nodeWithoutEmbedding = new TextNode({
text: 'Test node',
metadata: { category: 'test' }
})
await expect(store.add([nodeWithoutEmbedding])).rejects.toThrow('Missing embedding for node')
})
it('should reject query with null embedding', async () => {
const query: VectorStoreQuery = {
queryEmbedding: undefined,
similarityTopK: 1,
mode: VectorStoreQueryMode.DEFAULT
}
await expect(store.query(query)).rejects.toThrow('queryEmbedding is required for vector search')
})
})
describe('Configuration Options', () => {
it('should work with pre-configured client', async () => {
const customClient = createClient({ url: ':memory:' })
const customStore = new LibSQLVectorStore({
client: customClient,
tableName: 'custom_table',
dimensions: 4
})
expect(customStore).toBeDefined()
const nodes: BaseNode<Metadata>[] = [
new TextNode({
embedding: [0.1, 0.2, 0.3, 0.4],
metadata: { custom: true }
})
]
const ids = await customStore.add(nodes)
expect(ids).toHaveLength(1)
})
it('should work with client configuration', async () => {
const configStore = new LibSQLVectorStore({
clientConfig: {
url: ':memory:'
},
tableName: 'config_table',
dimensions: 3
})
expect(configStore).toBeDefined()
const db = configStore.client()
expect(db).toBeDefined()
})
})
describe('Query Modes', () => {
beforeEach(async () => {
// Add test data with text content for FTS
const nodes: BaseNode<Metadata>[] = [
new TextNode({
text: 'Machine learning and artificial intelligence are transforming technology',
embedding: [1.0, 0.0],
metadata: { category: 'technology', topic: 'ai' }
}),
new TextNode({
text: 'Cooking recipes and food preparation techniques',
embedding: [0.0, 1.0],
metadata: { category: 'food', topic: 'cooking' }
}),
new TextNode({
text: 'Deep learning neural networks for artificial intelligence',
embedding: [0.8, 0.2],
metadata: { category: 'technology', topic: 'ai' }
})
]
await store.add(nodes)
})
it('should query using default mode (vector search)', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.9, 0.1],
similarityTopK: 2,
mode: VectorStoreQueryMode.DEFAULT
}
const result = await store.query(query)
expect(result.nodes).toHaveLength(2)
expect(result.similarities).toHaveLength(2)
expect(result.ids).toHaveLength(2)
// First result should be more similar (closer to [1.0, 0.0])
expect(result.similarities[0]).toBeGreaterThan(result.similarities[1])
})
it('should query using bm25 mode (full-text search)', async () => {
const query: VectorStoreQuery = {
queryStr: 'artificial intelligence',
similarityTopK: 2,
mode: 'bm25' as VectorStoreQueryMode
}
const result = await store.query(query)
const nodes = result.nodes ?? []
expect(nodes).toHaveLength(2)
expect(result.similarities).toHaveLength(2)
expect(result.ids).toHaveLength(2)
nodes.forEach((node) => {
const text = node.getContent(MetadataMode.NONE).toLowerCase()
expect(text.includes('artificial') || text.includes('intelligence')).toBe(true)
})
})
it('should query bm25 mode with non-consecutive multi-word user text', async () => {
const result = await store.query({
queryStr: 'artificial technology',
similarityTopK: 2,
mode: 'bm25' as VectorStoreQueryMode
})
const nodes = result.nodes ?? []
expect(nodes.length).toBeGreaterThan(0)
expect(nodes.some((node) => node.getContent(MetadataMode.NONE).includes('artificial intelligence'))).toBe(true)
})
it('should query bm25 mode with punctuation as ordinary user text', async () => {
await store.add([
new TextNode({
text: 'DeepSeek-V3.2 release notes mention node.js, README.md, and C++ usage examples',
embedding: [0.7, 0.3],
metadata: { category: 'release' }
})
])
const queries = ['DeepSeek-V3.2', 'README.md', 'node.js', 'C++', 'DeepSeek "V3.2"']
for (const queryStr of queries) {
const result = await store.query({
queryStr,
similarityTopK: 3,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.nodes?.length ?? 0).toBeGreaterThan(0)
}
})
it('should return empty bm25 results for punctuation-only user text', async () => {
const result = await store.query({
queryStr: '...',
similarityTopK: 3,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result).toEqual({
nodes: [],
similarities: [],
ids: []
})
})
it('should throw error for bm25 mode without queryStr', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.5, 0.5],
similarityTopK: 2,
mode: 'bm25' as VectorStoreQueryMode
}
await expect(store.query(query)).rejects.toThrow('queryStr is required for BM25 mode')
})
it('should query using hybrid mode (vector + FTS)', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.9, 0.1],
queryStr: 'artificial intelligence',
similarityTopK: 2,
mode: 'hybrid' as VectorStoreQueryMode,
alpha: 0.5
}
const result = await store.query(query)
const nodes = result.nodes ?? []
expect(nodes).toHaveLength(2)
expect(result.similarities).toHaveLength(2)
expect(result.ids).toHaveLength(2)
nodes.forEach((node) => {
const text = node.getContent(MetadataMode.NONE).toLowerCase()
expect(text.includes('artificial') || text.includes('intelligence') || text.includes('learning')).toBe(true)
})
})
it('should query hybrid mode with non-consecutive multi-word user text', async () => {
const result = await store.query({
queryEmbedding: [0.9, 0.1],
queryStr: 'artificial technology',
similarityTopK: 2,
mode: 'hybrid' as VectorStoreQueryMode
})
const nodes = result.nodes ?? []
expect(nodes.length).toBeGreaterThan(0)
expect(nodes.some((node) => node.getContent(MetadataMode.NONE).includes('artificial intelligence'))).toBe(true)
})
it('should query hybrid mode with punctuation as ordinary user text', async () => {
await store.add([
new TextNode({
text: 'DeepSeek-V3.2 release notes for hybrid retrieval',
embedding: [0.9, 0.1],
metadata: { category: 'release' }
})
])
const result = await store.query({
queryEmbedding: [0.9, 0.1],
queryStr: 'DeepSeek-V3.2',
similarityTopK: 2,
mode: 'hybrid' as VectorStoreQueryMode
})
expect(result.nodes?.length ?? 0).toBeGreaterThan(0)
})
it('should throw error for hybrid mode without queryEmbedding', async () => {
const query: VectorStoreQuery = {
queryStr: 'artificial intelligence',
similarityTopK: 2,
mode: 'hybrid' as VectorStoreQueryMode
}
await expect(store.query(query)).rejects.toThrow('queryEmbedding is required for HYBRID mode')
})
it('should throw error for hybrid mode without queryStr', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.5, 0.5],
similarityTopK: 2,
mode: 'hybrid' as VectorStoreQueryMode
}
await expect(store.query(query)).rejects.toThrow('queryStr is required for HYBRID mode')
})
it('should fallback to vector search for unknown query mode', async () => {
const query: VectorStoreQuery = {
queryEmbedding: [0.5, 0.5],
similarityTopK: 2,
mode: 'unknown_mode' as VectorStoreQueryMode
}
const result = await store.query(query)
// Should fallback to vector search and return results
expect(result.nodes).toBeDefined()
expect(result.similarities).toBeDefined()
expect(result.ids).toBeDefined()
})
it('should update bm25 index after upsert', async () => {
const node = new TextNode({
id_: 'upsert-doc',
text: 'legacy keyword content',
embedding: [0.6, 0.4],
metadata: { category: 'technology' }
})
await store.add([node])
let result = await store.query({
queryStr: 'legacy',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).toContain('upsert-doc')
await store.add([
new TextNode({
id_: 'upsert-doc',
text: 'fresh keyword content',
embedding: [0.6, 0.4],
metadata: { category: 'technology' }
})
])
result = await store.query({
queryStr: 'legacy',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).not.toContain('upsert-doc')
result = await store.query({
queryStr: 'fresh',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).toContain('upsert-doc')
})
it('should remove deleted documents from bm25 index', async () => {
const node = new TextNode({
id_: 'delete-doc',
text: 'remove me from bm25',
embedding: [0.4, 0.6],
metadata: { category: 'technology' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-delete',
metadata: {}
}
}
})
await store.add([node])
let result = await store.query({
queryStr: 'remove',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).toContain('delete-doc')
await store.delete('item-delete')
result = await store.query({
queryStr: 'remove',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).not.toContain('delete-doc')
})
})
describe('exists', () => {
it('should return true for existing external_id', async () => {
const nodes: BaseNode<Metadata>[] = [
new TextNode({
id_: 'doc-123',
embedding: [0.1, 0.2],
metadata: { category: 'exists' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
]
await store.add(nodes)
const exists = await store.exists('item-1')
expect(exists).toBe(true)
})
it('should return false for non-existing document', async () => {
const exists = await store.exists('non-existent-ref')
expect(exists).toBe(false)
})
it('should respect collection when checking existence', async () => {
store.setCollection('collection-a')
const nodes: BaseNode<Metadata>[] = [
new TextNode({
embedding: [0.1, 0.2],
metadata: { category: 'exists' },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-collection',
metadata: {}
}
}
})
]
await store.add(nodes)
// Should find in same collection
expect(await store.exists('item-collection')).toBe(true)
// Should not find in different collection
store.setCollection('collection-b')
expect(await store.exists('item-collection')).toBe(false)
})
})
describe('chunk deletion', () => {
it('should delete one chunk by id, external_id, and collection only', async () => {
store.setCollection('collection-a')
await store.add([
new TextNode({
id_: 'chunk-1',
text: 'first chunk',
embedding: [0.1, 0.2],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 2 },
relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } }
}),
new TextNode({
id_: 'chunk-2',
text: 'second chunk',
embedding: [0.2, 0.3],
metadata: { itemId: 'item-1', chunkIndex: 1, tokenCount: 2 },
relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } }
})
])
const otherCollectionStore = new LibSQLVectorStore({
client,
tableName: 'test_embeddings',
dimensions: 2,
collection: 'collection-b'
})
await otherCollectionStore.add([
new TextNode({
id_: 'chunk-1',
text: 'other collection chunk',
embedding: [0.3, 0.4],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 3 },
relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } }
})
])
await store.deleteByIdAndExternalId('chunk-1', 'item-1')
const rows = await client.execute(
'SELECT id, external_id, collection FROM test_embeddings ORDER BY collection, id'
)
expect(rows.rows).toHaveLength(2)
expect(rows.rows[0]).toMatchObject({ id: 'chunk-2', external_id: 'item-1', collection: 'collection-a' })
expect(rows.rows[1]).toMatchObject({ id: 'chunk-1', external_id: 'item-1', collection: 'collection-b' })
})
it('should remove a deleted chunk from bm25 search', async () => {
const node = new TextNode({
id_: 'chunk-bm25-delete',
text: 'delete this exact chunk',
embedding: [0.5, 0.6],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 4 },
relationships: { [NodeRelationship.SOURCE]: { nodeId: 'item-1', metadata: {} } }
})
await store.add([node])
await store.deleteByIdAndExternalId('chunk-bm25-delete', 'item-1')
const result = await store.query({
queryStr: 'delete exact',
similarityTopK: 5,
mode: 'bm25' as VectorStoreQueryMode
})
expect(result.ids).not.toContain('chunk-bm25-delete')
})
})
describe('listByExternalId', () => {
it('should list documents by external_id in chunk order without embeddings', async () => {
await store.add([
new TextNode({
id_: 'chunk-2',
text: 'second chunk',
embedding: [0.1, 0.2],
metadata: { itemId: 'item-1', chunkIndex: 1, tokenCount: 2 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
}),
new TextNode({
id_: 'chunk-1',
text: 'first chunk',
embedding: [0.3, 0.4],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 2 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
}),
new TextNode({
id_: 'other-chunk',
text: 'other item chunk',
embedding: [0.5, 0.6],
metadata: { itemId: 'item-2', chunkIndex: 0, tokenCount: 3 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-2',
metadata: {}
}
}
})
])
const chunks = await store.listByExternalId('item-1')
expect(chunks.map((chunk) => chunk.id_)).toEqual(['chunk-1', 'chunk-2'])
expect(chunks.map((chunk) => chunk.getContent(MetadataMode.NONE))).toEqual(['first chunk', 'second chunk'])
expect(chunks.map((chunk) => chunk.metadata.chunkIndex)).toEqual([0, 1])
expect(() => chunks[0]?.getEmbedding()).toThrow('Embedding not set')
})
it('should fall back to external_id when listed metadata has no itemId', async () => {
await store.add([
new TextNode({
id_: 'chunk-without-item-id',
text: 'chunk without item id',
embedding: [0.1, 0.2],
metadata: { chunkIndex: 0, tokenCount: 4 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
])
const chunks = await store.listByExternalId('item-1')
expect(chunks).toHaveLength(1)
expect(chunks[0]?.metadata).toMatchObject({
itemId: 'item-1',
chunkIndex: 0,
tokenCount: 4
})
})
it('should tolerate invalid metadata JSON when listing documents', async () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {})
await store.add([
new TextNode({
id_: 'chunk-invalid-list-metadata',
text: 'chunk with invalid list metadata',
embedding: [0.1, 0.2],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 5 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
])
await client.execute({
sql: 'UPDATE test_embeddings SET metadata = ? WHERE id = ?',
args: ['{"itemId":', 'chunk-invalid-list-metadata']
})
const chunks = await store.listByExternalId('item-1')
expect(chunks).toHaveLength(1)
expect(chunks[0]?.id_).toBe('chunk-invalid-list-metadata')
expect(chunks[0]?.metadata).toEqual({ itemId: 'item-1' })
expect(warnSpy).toHaveBeenCalledWith(
'Failed to parse metadata JSON for row chunk-invalid-list-metadata',
expect.any(Error)
)
warnSpy.mockRestore()
})
it('should respect collection when listing documents', async () => {
store.setCollection('collection-a')
await store.add([
new TextNode({
id_: 'collection-a-chunk',
text: 'collection a chunk',
embedding: [0.1, 0.2],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 3 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
])
store.setCollection('collection-b')
await store.add([
new TextNode({
id_: 'collection-b-chunk',
text: 'collection b chunk',
embedding: [0.3, 0.4],
metadata: { itemId: 'item-1', chunkIndex: 0, tokenCount: 3 },
relationships: {
[NodeRelationship.SOURCE]: {
nodeId: 'item-1',
metadata: {}
}
}
})
])
const chunks = await store.listByExternalId('item-1')
expect(chunks.map((chunk) => chunk.id_)).toEqual(['collection-b-chunk'])
})
})
})