feat: add keyword-based topic search with whole-word matching and result sorting (#12933)

### What this PR does

Before this PR:
Topic search only matched exact strings, making it difficult to find
topics when the user doesn't remember the exact title.

After this PR:
Topic search now supports keyword-based matching, allowing users to find
topics by entering partial keywords. This improves the search experience
significantly.

### Why we need it and why it was done in this way

The current search functionality is limited to exact string matching
which is not user-friendly. Keyword-based search provides a more
flexible and intuitive search experience.

The following tradeoffs were made:
- Added a new utility module (`keywordSearch.ts`) to keep the search
logic separated and testable.

The following alternatives were considered:
- Fuzzy search libraries (e.g., fuse.js) were considered but keyword
matching was sufficient for this use case without adding extra
dependencies.

### Breaking changes

None.

### Checklist

- [x] PR: The PR description is expressive enough and will help future
contributors
- [x] Code: Write code that humans can understand and Keep it simple
- [x] Refactor: You have left the code cleaner than you found it (Boy
Scout Rule)

```release-note
Improved topic search with keyword-based search functionality
This commit is contained in:
MaziarAshtari
2026-02-16 21:34:13 +03:30
committed by GitHub
parent e61e1bb672
commit 142e0c1cf6
6 changed files with 223 additions and 30 deletions

View File

@@ -1582,8 +1582,16 @@
"message": "Locate the message"
},
"search": {
"match": {
"substring": "Contains",
"whole_word": "Whole word"
},
"messages": "Search All Messages",
"placeholder": "Search topics or messages...",
"sort": {
"newest": "Newest first",
"oldest": "Oldest first"
},
"topics": {
"empty": "No topics found, press Enter to search all messages"
}

View File

@@ -1582,8 +1582,16 @@
"message": "定位到消息"
},
"search": {
"match": {
"substring": "包含",
"whole_word": "整词"
},
"messages": "搜索所有消息",
"placeholder": "搜索话题或消息...",
"sort": {
"newest": "最新优先",
"oldest": "最早优先"
},
"topics": {
"empty": "没有找到相关话题,点击回车键搜索所有消息"
}

View File

@@ -1582,8 +1582,16 @@
"message": "定位到訊息"
},
"search": {
"match": {
"substring": "包含",
"whole_word": "整詞"
},
"messages": "搜尋所有訊息",
"placeholder": "搜尋話題或訊息...",
"sort": {
"newest": "最新優先",
"oldest": "最早優先"
},
"topics": {
"empty": "沒有找到相關話題,按 Enter 鍵搜尋所有訊息"
}

View File

@@ -4,10 +4,17 @@ import useScrollPosition from '@renderer/hooks/useScrollPosition'
import { selectTopicsMap } from '@renderer/store/assistants'
import type { Topic } from '@renderer/types'
import { type Message, MessageBlockType } from '@renderer/types/newMessage'
import { List, Spin, Typography } from 'antd'
import {
buildKeywordRegexes,
buildKeywordUnionRegex,
type KeywordMatchMode,
splitKeywordsToTerms
} from '@renderer/utils/keywordSearch'
import { List, Segmented, Spin, Typography } from 'antd'
import { useLiveQuery } from 'dexie-react-hooks'
import type { FC } from 'react'
import { memo, useCallback, useEffect, useRef, useState } from 'react'
import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useSelector } from 'react-redux'
import styled from 'styled-components'
@@ -32,6 +39,8 @@ const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3
type ResultSortOrder = 'newest' | 'oldest'
const stripMarkdownFormatting = (text: string) => {
return text
.replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
@@ -46,8 +55,6 @@ const stripMarkdownFormatting = (text: string) => {
const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
const mergeRanges = (ranges: Array<[number, number]>) => {
const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
const merged: Array<[number, number]> = []
@@ -110,7 +117,7 @@ const buildLineSnippet = (line: string, regexes: RegExp[]) => {
return result
}
const buildSearchSnippet = (text: string, terms: string[]) => {
const buildSearchSnippet = (text: string, terms: string[], matchMode: KeywordMatchMode) => {
const normalized = normalizeText(stripMarkdownFormatting(text))
const lines = normalized.split('\n')
if (lines.length === 0) {
@@ -118,7 +125,7 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
}
const nonEmptyTerms = terms.filter((term) => term.length > 0)
const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
const regexes = buildKeywordRegexes(nonEmptyTerms, { matchMode, flags: 'gi' })
const matchedLineIndexes: number[] = []
if (regexes.length > 0) {
@@ -179,15 +186,13 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
}
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
const { t } = useTranslation()
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
const observerRef = useRef<MutationObserver | null>(null)
const [searchTerms, setSearchTerms] = useState<string[]>(
keywords
.toLowerCase()
.split(' ')
.filter((term) => term.length > 0)
)
const [matchMode, setMatchMode] = useState<KeywordMatchMode>('whole-word')
const [sortOrder, setSortOrder] = useState<ResultSortOrder>('newest')
const [searchTerms, setSearchTerms] = useState<string[]>(splitKeywordsToTerms(keywords))
const topics = useLiveQuery(() => db.topics.toArray(), [])
// FIXME: db 中没有 topic.name 等信息,只能从 store 获取
@@ -209,11 +214,8 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
}
const startTime = performance.now()
const newSearchTerms = keywords
.toLowerCase()
.split(/\s+/)
.filter((term) => term.length > 0)
const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
const newSearchTerms = splitKeywordsToTerms(keywords)
const searchRegexes = buildKeywordRegexes(newSearchTerms, { matchMode, flags: 'i' })
const blocks = (await db.message_blocks.toArray())
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
@@ -234,7 +236,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
message,
topic,
content: block.content,
snippet: buildSearchSnippet(block.content, newSearchTerms)
snippet: buildSearchSnippet(block.content, newSearchTerms, matchMode)
}
}
}
@@ -250,20 +252,27 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
})
setSearchTerms(newSearchTerms)
setIsLoading(false)
}, [keywords, storeTopicsMap, topics])
}, [keywords, matchMode, storeTopicsMap, topics])
const sortedSearchResults = useMemo(() => {
const results = [...searchResults]
results.sort((a, b) => {
const timeA = Date.parse(a.message.createdAt) || 0
const timeB = Date.parse(b.message.createdAt) || 0
if (timeA !== timeB) {
return sortOrder === 'newest' ? timeB - timeA : timeA - timeB
}
return a.message.id.localeCompare(b.message.id)
})
return results
}, [searchResults, sortOrder])
const highlightText = (text: string) => {
const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
if (uniqueTerms.length === 0) {
const highlightRegex = buildKeywordUnionRegex(searchTerms, { matchMode, flags: 'gi' })
if (!highlightRegex) {
return <span dangerouslySetInnerHTML={{ __html: text }} />
}
const pattern = uniqueTerms
.sort((a, b) => b.length - a.length)
.map((term) => escapeRegex(term))
.join('|')
const regex = new RegExp(pattern, 'gi')
const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
const highlightedText = text.replace(highlightRegex, (match) => `<mark>${match}</mark>`)
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
}
@@ -289,14 +298,36 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
return (
<Container ref={containerRef} {...props} onScroll={handleScroll}>
<Spin spinning={isLoading} indicator={<LoadingIcon color="var(--color-text-2)" />}>
{searchResults.length > 0 && (
<SearchToolbar>
<Segmented
shape="round"
size="small"
value={sortOrder}
onChange={(value) => setSortOrder(value as ResultSortOrder)}
options={[
{ label: t('history.search.sort.newest'), value: 'newest' },
{ label: t('history.search.sort.oldest'), value: 'oldest' }
]}
/>
<Segmented
shape="round"
size="small"
value={matchMode}
onChange={(value) => setMatchMode(value as KeywordMatchMode)}
options={[
{ label: t('history.search.match.whole_word'), value: 'whole-word' },
{ label: t('history.search.match.substring'), value: 'substring' }
]}
/>
</SearchToolbar>
{sortedSearchResults.length > 0 && (
<SearchStats>
Found {searchStats.count} results in {searchStats.time.toFixed(3)} seconds
</SearchStats>
)}
<List
itemLayout="vertical"
dataSource={searchResults}
dataSource={sortedSearchResults}
pagination={{
pageSize: 10,
hideOnSinglePage: true
@@ -339,6 +370,16 @@ const SearchStats = styled.div`
color: var(--color-text-3);
`
const SearchToolbar = styled.div`
width: 100%;
display: flex;
flex-direction: row;
justify-content: flex-start;
align-items: center;
gap: 10px;
margin-bottom: 8px;
`
const SearchResultTime = styled.div`
margin-top: 10px;
text-align: right;

View File

@@ -0,0 +1,70 @@
import { describe, expect, it } from 'vitest'
import {
buildKeywordRegex,
buildKeywordUnionRegex,
type KeywordMatchMode,
splitKeywordsToTerms
} from '../keywordSearch'
describe('keywordSearch', () => {
describe('splitKeywordsToTerms', () => {
it('splits by whitespace and lowercases', () => {
expect(splitKeywordsToTerms(' Foo\tBAR \n baz ')).toEqual(['foo', 'bar', 'baz'])
})
it('returns empty array for empty input', () => {
expect(splitKeywordsToTerms('')).toEqual([])
})
})
describe('buildKeywordRegex (whole-word)', () => {
const matchMode: KeywordMatchMode = 'whole-word'
it('matches standalone tokens but not substrings inside words', () => {
const regex = buildKeywordRegex('sms', { matchMode })
expect(regex.test('sms')).toBe(true)
expect(regex.test('sms,')).toBe(true)
expect(regex.test('use sms now')).toBe(true)
expect(regex.test('mechanisms')).toBe(false)
})
it('does not match inside longer alphanumeric strings (e.g. API keys)', () => {
const regex = buildKeywordRegex('sms', { matchMode })
expect(regex.test('IMr4WSMS5dwa52')).toBe(false)
})
it('treats underscores and punctuation as token boundaries', () => {
const regex = buildKeywordRegex('sms', { matchMode })
expect(regex.test('sms_service')).toBe(true)
expect(regex.test('sms-service')).toBe(true)
expect(regex.test('smss')).toBe(false)
})
it('does not match inside non-ASCII words', () => {
const regex = buildKeywordRegex('ana', { matchMode })
expect(regex.test('mañana')).toBe(false)
expect(regex.test('ana')).toBe(true)
})
})
describe('buildKeywordRegex (substring)', () => {
const matchMode: KeywordMatchMode = 'substring'
it('matches substrings inside other words', () => {
const regex = buildKeywordRegex('sms', { matchMode })
expect(regex.test('mechanisms')).toBe(true)
expect(regex.test('IMr4WSMS5dwa52')).toBe(true)
})
})
describe('buildKeywordUnionRegex', () => {
it('builds a case-insensitive union regex', () => {
const regex = buildKeywordUnionRegex(['sms', 'mms'], { matchMode: 'whole-word', flags: 'i' })
expect(regex).not.toBeNull()
expect(regex?.test('SMS')).toBe(true)
expect(regex?.test('MMS')).toBe(true)
expect(regex?.test('mechanisms')).toBe(false)
})
})
})

View File

@@ -0,0 +1,58 @@
export type KeywordMatchMode = 'whole-word' | 'substring'
export function escapeRegex(text: string): string {
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}
export function splitKeywordsToTerms(keywords: string): string[] {
return (keywords || '')
.toLowerCase()
.split(/\s+/)
.filter((term) => term.length > 0)
}
function buildWholeWordPattern(escapedTerm: string): string {
// "Whole word" here means: do not match inside a larger alphanumeric token.
// This avoids false positives like:
// - API keys: "IMr4WSMS5dwa52"
// - suffixes: "mechanis[m][s]" when searching "sms"
return `(?<![\\p{L}\\p{N}])${escapedTerm}(?![\\p{L}\\p{N}])`
}
function addRegexFlag(flags: string, flag: string): string {
return flags.includes(flag) ? flags : `${flags}${flag}`
}
export function buildKeywordPattern(term: string, matchMode: KeywordMatchMode): string {
const escaped = escapeRegex(term)
return matchMode === 'whole-word' ? buildWholeWordPattern(escaped) : escaped
}
export function buildKeywordRegex(term: string, options: { matchMode: KeywordMatchMode; flags?: string }): RegExp {
const flags = options.flags ?? 'i'
const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
return new RegExp(buildKeywordPattern(term, options.matchMode), normalizedFlags)
}
export function buildKeywordRegexes(
terms: string[],
options: { matchMode: KeywordMatchMode; flags?: string }
): RegExp[] {
return terms.filter((term) => term.length > 0).map((term) => buildKeywordRegex(term, options))
}
export function buildKeywordUnionRegex(
terms: string[],
options: { matchMode: KeywordMatchMode; flags?: string }
): RegExp | null {
const uniqueTerms = Array.from(new Set(terms.filter((term) => term.length > 0)))
if (uniqueTerms.length === 0) return null
const patterns = uniqueTerms
.sort((a, b) => b.length - a.length)
.map((term) => buildKeywordPattern(term, options.matchMode))
const flags = options.flags ?? 'gi'
const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
return new RegExp(patterns.join('|'), normalizedFlags)
}