feat: add keyword-based topic search with whole-word matching and result sorting (#12933)

### What this PR does Before this PR: Topic search only matched exact strings, making it difficult to find topics when the user doesn't remember the exact title. After this PR: Topic search now supports keyword-based matching, allowing users to find topics by entering partial keywords. This improves the search experience significantly. ### Why we need it and why it was done in this way The current search functionality is limited to exact string matching which is not user-friendly. Keyword-based search provides a more flexible and intuitive search experience. The following tradeoffs were made: - Added a new utility module (`keywordSearch.ts`) to keep the search logic separated and testable. The following alternatives were considered: - Fuzzy search libraries (e.g., fuse.js) were considered but keyword matching was sufficient for this use case without adding extra dependencies. ### Breaking changes None. ### Checklist - [x] PR: The PR description is expressive enough and will help future contributors - [x] Code: Write code that humans can understand and Keep it simple - [x] Refactor: You have left the code cleaner than you found it (Boy Scout Rule) ```release-note Improved topic search with keyword-based search functionality
2026-07-06 05:55:28 +08:00 · 2026-02-16 21:34:13 +03:30
parent e61e1bb672
commit 142e0c1cf6
6 changed files with 223 additions and 30 deletions
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@@ -1582,8 +1582,16 @@
      "message": "Locate the message"
    },
    "search": {
+      "match": {
+        "substring": "Contains",
+        "whole_word": "Whole word"
+      },
      "messages": "Search All Messages",
      "placeholder": "Search topics or messages...",
+      "sort": {
+        "newest": "Newest first",
+        "oldest": "Oldest first"
+      },
      "topics": {
        "empty": "No topics found, press Enter to search all messages"
      }
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@@ -1582,8 +1582,16 @@
      "message": "定位到消息"
    },
    "search": {
+      "match": {
+        "substring": "包含",
+        "whole_word": "整词"
+      },
      "messages": "搜索所有消息",
      "placeholder": "搜索话题或消息...",
+      "sort": {
+        "newest": "最新优先",
+        "oldest": "最早优先"
+      },
      "topics": {
        "empty": "没有找到相关话题，点击回车键搜索所有消息"
      }
--- a/src/renderer/src/i18n/locales/zh-tw.json
+++ b/src/renderer/src/i18n/locales/zh-tw.json
@@ -1582,8 +1582,16 @@
      "message": "定位到訊息"
    },
    "search": {
+      "match": {
+        "substring": "包含",
+        "whole_word": "整詞"
+      },
      "messages": "搜尋所有訊息",
      "placeholder": "搜尋話題或訊息...",
+      "sort": {
+        "newest": "最新優先",
+        "oldest": "最早優先"
+      },
      "topics": {
        "empty": "沒有找到相關話題，按 Enter 鍵搜尋所有訊息"
      }
--- a/src/renderer/src/pages/history/components/SearchResults.tsx
+++ b/src/renderer/src/pages/history/components/SearchResults.tsx
@@ -4,10 +4,17 @@ import useScrollPosition from '@renderer/hooks/useScrollPosition'
 import { selectTopicsMap } from '@renderer/store/assistants'
 import type { Topic } from '@renderer/types'
 import { type Message, MessageBlockType } from '@renderer/types/newMessage'
-import { List, Spin, Typography } from 'antd'
+import {
+  buildKeywordRegexes,
+  buildKeywordUnionRegex,
+  type KeywordMatchMode,
+  splitKeywordsToTerms
+} from '@renderer/utils/keywordSearch'
+import { List, Segmented, Spin, Typography } from 'antd'
 import { useLiveQuery } from 'dexie-react-hooks'
 import type { FC } from 'react'
-import { memo, useCallback, useEffect, useRef, useState } from 'react'
+import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
+import { useTranslation } from 'react-i18next'
 import { useSelector } from 'react-redux'
 import styled from 'styled-components'

@@ -32,6 +39,8 @@ const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
 const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
 const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3

+type ResultSortOrder = 'newest' | 'oldest'
+
 const stripMarkdownFormatting = (text: string) => {
  return text
    .replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
@@ -46,8 +55,6 @@ const stripMarkdownFormatting = (text: string) => {

 const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')

-const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
-
 const mergeRanges = (ranges: Array<[number, number]>) => {
  const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
  const merged: Array<[number, number]> = []
@@ -110,7 +117,7 @@ const buildLineSnippet = (line: string, regexes: RegExp[]) => {
  return result
 }

-const buildSearchSnippet = (text: string, terms: string[]) => {
+const buildSearchSnippet = (text: string, terms: string[], matchMode: KeywordMatchMode) => {
  const normalized = normalizeText(stripMarkdownFormatting(text))
  const lines = normalized.split('\n')
  if (lines.length === 0) {
@@ -118,7 +125,7 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
  }

  const nonEmptyTerms = terms.filter((term) => term.length > 0)
-  const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
+  const regexes = buildKeywordRegexes(nonEmptyTerms, { matchMode, flags: 'gi' })
  const matchedLineIndexes: number[] = []

  if (regexes.length > 0) {
@@ -179,15 +186,13 @@ const buildSearchSnippet = (text: string, terms: string[]) => {
 }

 const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
+  const { t } = useTranslation()
  const { handleScroll, containerRef } = useScrollPosition('SearchResults')
  const observerRef = useRef<MutationObserver | null>(null)

-  const [searchTerms, setSearchTerms] = useState<string[]>(
-    keywords
-      .toLowerCase()
-      .split(' ')
-      .filter((term) => term.length > 0)
-  )
+  const [matchMode, setMatchMode] = useState<KeywordMatchMode>('whole-word')
+  const [sortOrder, setSortOrder] = useState<ResultSortOrder>('newest')
+  const [searchTerms, setSearchTerms] = useState<string[]>(splitKeywordsToTerms(keywords))

  const topics = useLiveQuery(() => db.topics.toArray(), [])
  // FIXME: db 中没有 topic.name 等信息，只能从 store 获取
@@ -209,11 +214,8 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
    }

    const startTime = performance.now()
-    const newSearchTerms = keywords
-      .toLowerCase()
-      .split(/\s+/)
-      .filter((term) => term.length > 0)
-    const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
+    const newSearchTerms = splitKeywordsToTerms(keywords)
+    const searchRegexes = buildKeywordRegexes(newSearchTerms, { matchMode, flags: 'i' })

    const blocks = (await db.message_blocks.toArray())
      .filter((block) => block.type === MessageBlockType.MAIN_TEXT)
@@ -234,7 +236,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
              message,
              topic,
              content: block.content,
-              snippet: buildSearchSnippet(block.content, newSearchTerms)
+              snippet: buildSearchSnippet(block.content, newSearchTerms, matchMode)
            }
          }
        }
@@ -250,20 +252,27 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
    })
    setSearchTerms(newSearchTerms)
    setIsLoading(false)
-  }, [keywords, storeTopicsMap, topics])
+  }, [keywords, matchMode, storeTopicsMap, topics])
+
+  const sortedSearchResults = useMemo(() => {
+    const results = [...searchResults]
+    results.sort((a, b) => {
+      const timeA = Date.parse(a.message.createdAt) || 0
+      const timeB = Date.parse(b.message.createdAt) || 0
+      if (timeA !== timeB) {
+        return sortOrder === 'newest' ? timeB - timeA : timeA - timeB
+      }
+      return a.message.id.localeCompare(b.message.id)
+    })
+    return results
+  }, [searchResults, sortOrder])

  const highlightText = (text: string) => {
-    const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
-    if (uniqueTerms.length === 0) {
+    const highlightRegex = buildKeywordUnionRegex(searchTerms, { matchMode, flags: 'gi' })
+    if (!highlightRegex) {
      return <span dangerouslySetInnerHTML={{ __html: text }} />
    }
-
-    const pattern = uniqueTerms
-      .sort((a, b) => b.length - a.length)
-      .map((term) => escapeRegex(term))
-      .join('|')
-    const regex = new RegExp(pattern, 'gi')
-    const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
+    const highlightedText = text.replace(highlightRegex, (match) => `<mark>${match}</mark>`)
    return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
  }

@@ -289,14 +298,36 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
  return (
    <Container ref={containerRef} {...props} onScroll={handleScroll}>
      <Spin spinning={isLoading} indicator={<LoadingIcon color="var(--color-text-2)" />}>
-        {searchResults.length > 0 && (
+        <SearchToolbar>
+          <Segmented
+            shape="round"
+            size="small"
+            value={sortOrder}
+            onChange={(value) => setSortOrder(value as ResultSortOrder)}
+            options={[
+              { label: t('history.search.sort.newest'), value: 'newest' },
+              { label: t('history.search.sort.oldest'), value: 'oldest' }
+            ]}
+          />
+          <Segmented
+            shape="round"
+            size="small"
+            value={matchMode}
+            onChange={(value) => setMatchMode(value as KeywordMatchMode)}
+            options={[
+              { label: t('history.search.match.whole_word'), value: 'whole-word' },
+              { label: t('history.search.match.substring'), value: 'substring' }
+            ]}
+          />
+        </SearchToolbar>
+        {sortedSearchResults.length > 0 && (
          <SearchStats>
            Found {searchStats.count} results in {searchStats.time.toFixed(3)} seconds
          </SearchStats>
        )}
        <List
          itemLayout="vertical"
-          dataSource={searchResults}
+          dataSource={sortedSearchResults}
          pagination={{
            pageSize: 10,
            hideOnSinglePage: true
@@ -339,6 +370,16 @@ const SearchStats = styled.div`
  color: var(--color-text-3);
 `

+const SearchToolbar = styled.div`
+  width: 100%;
+  display: flex;
+  flex-direction: row;
+  justify-content: flex-start;
+  align-items: center;
+  gap: 10px;
+  margin-bottom: 8px;
+`
+
 const SearchResultTime = styled.div`
  margin-top: 10px;
  text-align: right;
--- a/src/renderer/src/utils/tests/keywordSearch.test.ts
+++ b/src/renderer/src/utils/tests/keywordSearch.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'vitest'
+
+import {
+  buildKeywordRegex,
+  buildKeywordUnionRegex,
+  type KeywordMatchMode,
+  splitKeywordsToTerms
+} from '../keywordSearch'
+
+describe('keywordSearch', () => {
+  describe('splitKeywordsToTerms', () => {
+    it('splits by whitespace and lowercases', () => {
+      expect(splitKeywordsToTerms('  Foo\tBAR \n baz  ')).toEqual(['foo', 'bar', 'baz'])
+    })
+
+    it('returns empty array for empty input', () => {
+      expect(splitKeywordsToTerms('')).toEqual([])
+    })
+  })
+
+  describe('buildKeywordRegex (whole-word)', () => {
+    const matchMode: KeywordMatchMode = 'whole-word'
+
+    it('matches standalone tokens but not substrings inside words', () => {
+      const regex = buildKeywordRegex('sms', { matchMode })
+      expect(regex.test('sms')).toBe(true)
+      expect(regex.test('sms,')).toBe(true)
+      expect(regex.test('use sms now')).toBe(true)
+      expect(regex.test('mechanisms')).toBe(false)
+    })
+
+    it('does not match inside longer alphanumeric strings (e.g. API keys)', () => {
+      const regex = buildKeywordRegex('sms', { matchMode })
+      expect(regex.test('IMr4WSMS5dwa52')).toBe(false)
+    })
+
+    it('treats underscores and punctuation as token boundaries', () => {
+      const regex = buildKeywordRegex('sms', { matchMode })
+      expect(regex.test('sms_service')).toBe(true)
+      expect(regex.test('sms-service')).toBe(true)
+      expect(regex.test('smss')).toBe(false)
+    })
+
+    it('does not match inside non-ASCII words', () => {
+      const regex = buildKeywordRegex('ana', { matchMode })
+      expect(regex.test('mañana')).toBe(false)
+      expect(regex.test('ana')).toBe(true)
+    })
+  })
+
+  describe('buildKeywordRegex (substring)', () => {
+    const matchMode: KeywordMatchMode = 'substring'
+
+    it('matches substrings inside other words', () => {
+      const regex = buildKeywordRegex('sms', { matchMode })
+      expect(regex.test('mechanisms')).toBe(true)
+      expect(regex.test('IMr4WSMS5dwa52')).toBe(true)
+    })
+  })
+
+  describe('buildKeywordUnionRegex', () => {
+    it('builds a case-insensitive union regex', () => {
+      const regex = buildKeywordUnionRegex(['sms', 'mms'], { matchMode: 'whole-word', flags: 'i' })
+      expect(regex).not.toBeNull()
+      expect(regex?.test('SMS')).toBe(true)
+      expect(regex?.test('MMS')).toBe(true)
+      expect(regex?.test('mechanisms')).toBe(false)
+    })
+  })
+})
--- a/src/renderer/src/utils/keywordSearch.ts
+++ b/src/renderer/src/utils/keywordSearch.ts
@@ -0,0 +1,58 @@
+export type KeywordMatchMode = 'whole-word' | 'substring'
+
+export function escapeRegex(text: string): string {
+  return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
+}
+
+export function splitKeywordsToTerms(keywords: string): string[] {
+  return (keywords || '')
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((term) => term.length > 0)
+}
+
+function buildWholeWordPattern(escapedTerm: string): string {
+  // "Whole word" here means: do not match inside a larger alphanumeric token.
+  // This avoids false positives like:
+  // - API keys: "IMr4WSMS5dwa52"
+  // - suffixes: "mechanis[m][s]" when searching "sms"
+  return `(?<![\\p{L}\\p{N}])${escapedTerm}(?![\\p{L}\\p{N}])`
+}
+
+function addRegexFlag(flags: string, flag: string): string {
+  return flags.includes(flag) ? flags : `${flags}${flag}`
+}
+
+export function buildKeywordPattern(term: string, matchMode: KeywordMatchMode): string {
+  const escaped = escapeRegex(term)
+  return matchMode === 'whole-word' ? buildWholeWordPattern(escaped) : escaped
+}
+
+export function buildKeywordRegex(term: string, options: { matchMode: KeywordMatchMode; flags?: string }): RegExp {
+  const flags = options.flags ?? 'i'
+  const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
+  return new RegExp(buildKeywordPattern(term, options.matchMode), normalizedFlags)
+}
+
+export function buildKeywordRegexes(
+  terms: string[],
+  options: { matchMode: KeywordMatchMode; flags?: string }
+): RegExp[] {
+  return terms.filter((term) => term.length > 0).map((term) => buildKeywordRegex(term, options))
+}
+
+export function buildKeywordUnionRegex(
+  terms: string[],
+  options: { matchMode: KeywordMatchMode; flags?: string }
+): RegExp | null {
+  const uniqueTerms = Array.from(new Set(terms.filter((term) => term.length > 0)))
+  if (uniqueTerms.length === 0) return null
+
+  const patterns = uniqueTerms
+    .sort((a, b) => b.length - a.length)
+    .map((term) => buildKeywordPattern(term, options.matchMode))
+
+  const flags = options.flags ?? 'gi'
+  const normalizedFlags = options.matchMode === 'whole-word' ? addRegexFlag(flags, 'u') : flags
+  return new RegExp(patterns.join('|'), normalizedFlags)
+}