mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-07-06 05:55:28 +08:00
feat: add keyword-based topic search with whole-word matching and result sorting (#12933)
### What this PR does Before this PR: Topic search only matched exact strings, making it difficult to find topics when the user doesn't remember the exact title. After this PR: Topic search now supports keyword-based matching, allowing users to find topics by entering partial keywords. This improves the search experience significantly. ### Why we need it and why it was done in this way The current search functionality is limited to exact string matching which is not user-friendly. Keyword-based search provides a more flexible and intuitive search experience. The following tradeoffs were made: - Added a new utility module (`keywordSearch.ts`) to keep the search logic separated and testable. The following alternatives were considered: - Fuzzy search libraries (e.g., fuse.js) were considered but keyword matching was sufficient for this use case without adding extra dependencies. ### Breaking changes None. ### Checklist - [x] PR: The PR description is expressive enough and will help future contributors - [x] Code: Write code that humans can understand and Keep it simple - [x] Refactor: You have left the code cleaner than you found it (Boy Scout Rule) ```release-note Improved topic search with keyword-based search functionality
This commit is contained in:
@@ -1582,8 +1582,16 @@
|
||||
"message": "Locate the message"
|
||||
},
|
||||
"search": {
|
||||
"match": {
|
||||
"substring": "Contains",
|
||||
"whole_word": "Whole word"
|
||||
},
|
||||
"messages": "Search All Messages",
|
||||
"placeholder": "Search topics or messages...",
|
||||
"sort": {
|
||||
"newest": "Newest first",
|
||||
"oldest": "Oldest first"
|
||||
},
|
||||
"topics": {
|
||||
"empty": "No topics found, press Enter to search all messages"
|
||||
}
|
||||
|
||||
@@ -1582,8 +1582,16 @@
|
||||
"message": "定位到消息"
|
||||
},
|
||||
"search": {
|
||||
"match": {
|
||||
"substring": "包含",
|
||||
"whole_word": "整词"
|
||||
},
|
||||
"messages": "搜索所有消息",
|
||||
"placeholder": "搜索话题或消息...",
|
||||
"sort": {
|
||||
"newest": "最新优先",
|
||||
"oldest": "最早优先"
|
||||
},
|
||||
"topics": {
|
||||
"empty": "没有找到相关话题,点击回车键搜索所有消息"
|
||||
}
|
||||
|
||||
@@ -1582,8 +1582,16 @@
|
||||
"message": "定位到訊息"
|
||||
},
|
||||
"search": {
|
||||
"match": {
|
||||
"substring": "包含",
|
||||
"whole_word": "整詞"
|
||||
},
|
||||
"messages": "搜尋所有訊息",
|
||||
"placeholder": "搜尋話題或訊息...",
|
||||
"sort": {
|
||||
"newest": "最新優先",
|
||||
"oldest": "最早優先"
|
||||
},
|
||||
"topics": {
|
||||
"empty": "沒有找到相關話題,按 Enter 鍵搜尋所有訊息"
|
||||
}
|
||||
|
||||
@@ -4,10 +4,17 @@ import useScrollPosition from '@renderer/hooks/useScrollPosition'
|
||||
import { selectTopicsMap } from '@renderer/store/assistants'
|
||||
import type { Topic } from '@renderer/types'
|
||||
import { type Message, MessageBlockType } from '@renderer/types/newMessage'
|
||||
import { List, Spin, Typography } from 'antd'
|
||||
import {
|
||||
buildKeywordRegexes,
|
||||
buildKeywordUnionRegex,
|
||||
type KeywordMatchMode,
|
||||
splitKeywordsToTerms
|
||||
} from '@renderer/utils/keywordSearch'
|
||||
import { List, Segmented, Spin, Typography } from 'antd'
|
||||
import { useLiveQuery } from 'dexie-react-hooks'
|
||||
import type { FC } from 'react'
|
||||
import { memo, useCallback, useEffect, useRef, useState } from 'react'
|
||||
import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useSelector } from 'react-redux'
|
||||
import styled from 'styled-components'
|
||||
|
||||
@@ -32,6 +39,8 @@ const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
|
||||
const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
|
||||
const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3
|
||||
|
||||
type ResultSortOrder = 'newest' | 'oldest'
|
||||
|
||||
const stripMarkdownFormatting = (text: string) => {
|
||||
return text
|
||||
.replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
|
||||
@@ -46,8 +55,6 @@ const stripMarkdownFormatting = (text: string) => {
|
||||
|
||||
const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
|
||||
|
||||
const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
|
||||
const mergeRanges = (ranges: Array<[number, number]>) => {
|
||||
const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
|
||||
const merged: Array<[number, number]> = []
|
||||
@@ -110,7 +117,7 @@ const buildLineSnippet = (line: string, regexes: RegExp[]) => {
|
||||
return result
|
||||
}
|
||||
|
||||
const buildSearchSnippet = (text: string, terms: string[]) => {
|
||||
const buildSearchSnippet = (text: string, terms: string[], matchMode: KeywordMatchMode) => {
|
||||
const normalized = normalizeText(stripMarkdownFormatting(text))
|
||||
const lines = normalized.split('\n')
|
||||
if (lines.length === 0) {
|
||||
@@ -118,7 +125,7 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
|
||||
}
|
||||
|
||||
const nonEmptyTerms = terms.filter((term) => term.length > 0)
|
||||
const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
|
||||
const regexes = buildKeywordRegexes(nonEmptyTerms, { matchMode, flags: 'gi' })
|
||||
const matchedLineIndexes: number[] = []
|
||||
|
||||
if (regexes.length > 0) {
|
||||
@@ -179,15 +186,13 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
|
||||
}
|
||||
|
||||
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
|
||||
const { t } = useTranslation()
|
||||
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
|
||||
const observerRef = useRef<MutationObserver | null>(null)
|
||||
|
||||
const [searchTerms, setSearchTerms] = useState<string[]>(
|
||||
keywords
|
||||
.toLowerCase()
|
||||
.split(' ')
|
||||
.filter((term) => term.length > 0)
|
||||
)
|
||||
const [matchMode, setMatchMode] = useState<KeywordMatchMode>('whole-word')
|
||||
const [sortOrder, setSortOrder] = useState<ResultSortOrder>('newest')
|
||||
const [searchTerms, setSearchTerms] = useState<string[]>(splitKeywordsToTerms(keywords))
|
||||
|
||||
const topics = useLiveQuery(() => db.topics.toArray(), [])
|
||||
// FIXME: db 中没有 topic.name 等信息,只能从 store 获取
|
||||
@@ -209,11 +214,8 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
}
|
||||
|
||||
const startTime = performance.now()
|
||||
const newSearchTerms = keywords
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((term) => term.length > 0)
|
||||
const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
|
||||
const newSearchTerms = splitKeywordsToTerms(keywords)
|
||||
const searchRegexes = buildKeywordRegexes(newSearchTerms, { matchMode, flags: 'i' })
|
||||
|
||||
const blocks = (await db.message_blocks.toArray())
|
||||
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
|
||||
@@ -234,7 +236,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
message,
|
||||
topic,
|
||||
content: block.content,
|
||||
snippet: buildSearchSnippet(block.content, newSearchTerms)
|
||||
snippet: buildSearchSnippet(block.content, newSearchTerms, matchMode)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -250,20 +252,27 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
})
|
||||
setSearchTerms(newSearchTerms)
|
||||
setIsLoading(false)
|
||||
}, [keywords, storeTopicsMap, topics])
|
||||
}, [keywords, matchMode, storeTopicsMap, topics])
|
||||
|
||||
const sortedSearchResults = useMemo(() => {
|
||||
const results = [...searchResults]
|
||||
results.sort((a, b) => {
|
||||
const timeA = Date.parse(a.message.createdAt) || 0
|
||||
const timeB = Date.parse(b.message.createdAt) || 0
|
||||
if (timeA !== timeB) {
|
||||
return sortOrder === 'newest' ? timeB - timeA : timeA - timeB
|
||||
}
|
||||
return a.message.id.localeCompare(b.message.id)
|
||||
})
|
||||
return results
|
||||
}, [searchResults, sortOrder])
|
||||
|
||||
const highlightText = (text: string) => {
|
||||
const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
|
||||
if (uniqueTerms.length === 0) {
|
||||
const highlightRegex = buildKeywordUnionRegex(searchTerms, { matchMode, flags: 'gi' })
|
||||
if (!highlightRegex) {
|
||||
return <span dangerouslySetInnerHTML={{ __html: text }} />
|
||||
}
|
||||
|
||||
const pattern = uniqueTerms
|
||||
.sort((a, b) => b.length - a.length)
|
||||
.map((term) => escapeRegex(term))
|
||||
.join('|')
|
||||
const regex = new RegExp(pattern, 'gi')
|
||||
const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
|
||||
const highlightedText = text.replace(highlightRegex, (match) => `<mark>${match}</mark>`)
|
||||
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
|
||||
}
|
||||
|
||||
@@ -289,14 +298,36 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
return (
|
||||
<Container ref={containerRef} {...props} onScroll={handleScroll}>
|
||||
<Spin spinning={isLoading} indicator={<LoadingIcon color="var(--color-text-2)" />}>
|
||||
{searchResults.length > 0 && (
|
||||
<SearchToolbar>
|
||||
<Segmented
|
||||
shape="round"
|
||||
size="small"
|
||||
value={sortOrder}
|
||||
onChange={(value) => setSortOrder(value as ResultSortOrder)}
|
||||
options={[
|
||||
{ label: t('history.search.sort.newest'), value: 'newest' },
|
||||
{ label: t('history.search.sort.oldest'), value: 'oldest' }
|
||||
]}
|
||||
/>
|
||||
<Segmented
|
||||
shape="round"
|
||||
size="small"
|
||||
value={matchMode}
|
||||
onChange={(value) => setMatchMode(value as KeywordMatchMode)}
|
||||
options={[
|
||||
{ label: t('history.search.match.whole_word'), value: 'whole-word' },
|
||||
{ label: t('history.search.match.substring'), value: 'substring' }
|
||||
]}
|
||||
/>
|
||||
</SearchToolbar>
|
||||
{sortedSearchResults.length > 0 && (
|
||||
<SearchStats>
|
||||
Found {searchStats.count} results in {searchStats.time.toFixed(3)} seconds
|
||||
</SearchStats>
|
||||
)}
|
||||
<List
|
||||
itemLayout="vertical"
|
||||
dataSource={searchResults}
|
||||
dataSource={sortedSearchResults}
|
||||
pagination={{
|
||||
pageSize: 10,
|
||||
hideOnSinglePage: true
|
||||
@@ -339,6 +370,16 @@ const SearchStats = styled.div`
|
||||
color: var(--color-text-3);
|
||||
`
|
||||
|
||||
const SearchToolbar = styled.div`
|
||||
width: 100%;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: flex-start;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 8px;
|
||||
`
|
||||
|
||||
const SearchResultTime = styled.div`
|
||||
margin-top: 10px;
|
||||
text-align: right;
|
||||
|
||||
70
src/renderer/src/utils/__tests__/keywordSearch.test.ts
Normal file
70
src/renderer/src/utils/__tests__/keywordSearch.test.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import {
|
||||
buildKeywordRegex,
|
||||
buildKeywordUnionRegex,
|
||||
type KeywordMatchMode,
|
||||
splitKeywordsToTerms
|
||||
} from '../keywordSearch'
|
||||
|
||||
describe('keywordSearch', () => {
|
||||
describe('splitKeywordsToTerms', () => {
|
||||
it('splits by whitespace and lowercases', () => {
|
||||
expect(splitKeywordsToTerms(' Foo\tBAR \n baz ')).toEqual(['foo', 'bar', 'baz'])
|
||||
})
|
||||
|
||||
it('returns empty array for empty input', () => {
|
||||
expect(splitKeywordsToTerms('')).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildKeywordRegex (whole-word)', () => {
|
||||
const matchMode: KeywordMatchMode = 'whole-word'
|
||||
|
||||
it('matches standalone tokens but not substrings inside words', () => {
|
||||
const regex = buildKeywordRegex('sms', { matchMode })
|
||||
expect(regex.test('sms')).toBe(true)
|
||||
expect(regex.test('sms,')).toBe(true)
|
||||
expect(regex.test('use sms now')).toBe(true)
|
||||
expect(regex.test('mechanisms')).toBe(false)
|
||||
})
|
||||
|
||||
it('does not match inside longer alphanumeric strings (e.g. API keys)', () => {
|
||||
const regex = buildKeywordRegex('sms', { matchMode })
|
||||
expect(regex.test('IMr4WSMS5dwa52')).toBe(false)
|
||||
})
|
||||
|
||||
it('treats underscores and punctuation as token boundaries', () => {
|
||||
const regex = buildKeywordRegex('sms', { matchMode })
|
||||
expect(regex.test('sms_service')).toBe(true)
|
||||
expect(regex.test('sms-service')).toBe(true)
|
||||
expect(regex.test('smss')).toBe(false)
|
||||
})
|
||||
|
||||
it('does not match inside non-ASCII words', () => {
|
||||
const regex = buildKeywordRegex('ana', { matchMode })
|
||||
expect(regex.test('mañana')).toBe(false)
|
||||
expect(regex.test('ana')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildKeywordRegex (substring)', () => {
|
||||
const matchMode: KeywordMatchMode = 'substring'
|
||||
|
||||
it('matches substrings inside other words', () => {
|
||||
const regex = buildKeywordRegex('sms', { matchMode })
|
||||
expect(regex.test('mechanisms')).toBe(true)
|
||||
expect(regex.test('IMr4WSMS5dwa52')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildKeywordUnionRegex', () => {
|
||||
it('builds a case-insensitive union regex', () => {
|
||||
const regex = buildKeywordUnionRegex(['sms', 'mms'], { matchMode: 'whole-word', flags: 'i' })
|
||||
expect(regex).not.toBeNull()
|
||||
expect(regex?.test('SMS')).toBe(true)
|
||||
expect(regex?.test('MMS')).toBe(true)
|
||||
expect(regex?.test('mechanisms')).toBe(false)
|
||||
})
|
||||
})
|
||||
})
|
||||
58
src/renderer/src/utils/keywordSearch.ts
Normal file
58
src/renderer/src/utils/keywordSearch.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
export type KeywordMatchMode = 'whole-word' | 'substring'
|
||||
|
||||
export function escapeRegex(text: string): string {
|
||||
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
}
|
||||
|
||||
export function splitKeywordsToTerms(keywords: string): string[] {
|
||||
return (keywords || '')
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((term) => term.length > 0)
|
||||
}
|
||||
|
||||
function buildWholeWordPattern(escapedTerm: string): string {
|
||||
// "Whole word" here means: do not match inside a larger alphanumeric token.
|
||||
// This avoids false positives like:
|
||||
// - API keys: "IMr4WSMS5dwa52"
|
||||
// - suffixes: "mechanis[m][s]" when searching "sms"
|
||||
return `(?<![\\p{L}\\p{N}])${escapedTerm}(?![\\p{L}\\p{N}])`
|
||||
}
|
||||
|
||||
function addRegexFlag(flags: string, flag: string): string {
|
||||
return flags.includes(flag) ? flags : `${flags}${flag}`
|
||||
}
|
||||
|
||||
export function buildKeywordPattern(term: string, matchMode: KeywordMatchMode): string {
|
||||
const escaped = escapeRegex(term)
|
||||
return matchMode === 'whole-word' ? buildWholeWordPattern(escaped) : escaped
|
||||
}
|
||||
|
||||
export function buildKeywordRegex(term: string, options: { matchMode: KeywordMatchMode; flags?: string }): RegExp {
|
||||
const flags = options.flags ?? 'i'
|
||||
const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
|
||||
return new RegExp(buildKeywordPattern(term, options.matchMode), normalizedFlags)
|
||||
}
|
||||
|
||||
export function buildKeywordRegexes(
|
||||
terms: string[],
|
||||
options: { matchMode: KeywordMatchMode; flags?: string }
|
||||
): RegExp[] {
|
||||
return terms.filter((term) => term.length > 0).map((term) => buildKeywordRegex(term, options))
|
||||
}
|
||||
|
||||
export function buildKeywordUnionRegex(
|
||||
terms: string[],
|
||||
options: { matchMode: KeywordMatchMode; flags?: string }
|
||||
): RegExp | null {
|
||||
const uniqueTerms = Array.from(new Set(terms.filter((term) => term.length > 0)))
|
||||
if (uniqueTerms.length === 0) return null
|
||||
|
||||
const patterns = uniqueTerms
|
||||
.sort((a, b) => b.length - a.length)
|
||||
.map((term) => buildKeywordPattern(term, options.matchMode))
|
||||
|
||||
const flags = options.flags ?? 'gi'
|
||||
const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
|
||||
return new RegExp(patterns.join('|'), normalizedFlags)
|
||||
}
|
||||
Reference in New Issue
Block a user