mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-07-03 12:27:41 +08:00
hotfix(naming): sanitize non-ASCII characters in provider name for env vars (#14915)
### What this PR does Before this PR: When a provider is named with non-ASCII characters (e.g. Chinese "测试", Japanese, Korean, emoji), Cherry Studio generates an invalid environment variable name for OpenCode (e.g. `OPENCODE_API_KEY_测试`), causing a bash export error: `not a valid identifier`. After this PR: The `sanitizeProviderName` function strips non-ASCII characters before constructing env var names. For pure non-ASCII names, a deterministic hash-based fallback (e.g. `p_xxxxx`) is used to produce a valid identifier. Fixes #14914 ### Why we need it and why it was done in this way Bash environment variable names only allow `[a-zA-Z0-9_]`. The original `sanitizeProviderName` only handled spaces and some special characters, but not non-ASCII characters like CJK or emoji. The following tradeoffs were made: - Non-ASCII characters are removed (not transliterated) to avoid introducing extra dependencies or language-specific logic. - A simple hash fallback ensures pure non-ASCII names still produce a valid, deterministic identifier. The following alternatives were considered: - Pinyin transliteration for Chinese — requires an additional library and only covers one language. - Using provider UUID as the env var suffix — works but produces unreadable identifiers. ### Breaking changes None. The fix only affects env var name generation for OpenCode. Existing providers with ASCII names are unaffected. ### Special notes for your reviewer Changes are limited to: - `src/renderer/src/utils/naming.ts` — `sanitizeProviderName()` function (the single source of truth for env var name sanitization) - `src/renderer/src/utils/__tests__/naming.test.ts` — Added test cases for Chinese, Japanese, Korean, emoji, and mixed ASCII/non-ASCII inputs ### Checklist - [x] PR: The PR description is expressive enough and will help future contributors - [x] Code: [Write code that humans can understand](https://en.wikiquote.org/wiki/Martin_Fowler#code-for-humans) and [Keep it simple](https://en.wikipedia.org/wiki/KISS_principle) - [ ] Refactor: You have [left the code cleaner than you found it (Boy Scout Rule)](https://learning.oreilly.com/library/view/97-things-every/9780596809515/ch08.html) - [ ] Upgrade: Impact of this change on upgrade flows was considered and addressed if required - [ ] Documentation: A [user-guide update](https://docs.cherry-ai.com) was considered and is present (link) or not required. Check this only when the PR introduces or changes a user-facing feature or behavior. - [x] Self-review: I have reviewed my own code (e.g., via [`/gh-pr-review`](/.claude/skills/gh-pr-review/SKILL.md), `gh pr diff`, or GitHub UI) before requesting review from others ### Release note ```release-note Fix bash export error when OpenCode provider name contains non-ASCII characters (e.g. Chinese, Japanese, emoji). ```
This commit is contained in:
@@ -330,17 +330,53 @@ describe('naming', () => {
|
||||
expect(sanitizeProviderName('My Provider')).toBe('My-Provider')
|
||||
})
|
||||
|
||||
it('should replace dangerous characters with underscores', () => {
|
||||
expect(sanitizeProviderName('Provider/Name')).toBe('Provider_Name')
|
||||
it('should strip characters outside env-var-safe whitelist', () => {
|
||||
expect(sanitizeProviderName('Provider/Name')).toBe('ProviderName')
|
||||
})
|
||||
|
||||
it('should handle mixed special characters', () => {
|
||||
expect(sanitizeProviderName('My Provider <test>:name')).toBe('My-Provider-_test__name')
|
||||
expect(sanitizeProviderName('My Provider <test>:name')).toBe('My-Provider-testname')
|
||||
})
|
||||
|
||||
it('should return empty string for empty input', () => {
|
||||
expect(sanitizeProviderName('')).toBe('')
|
||||
})
|
||||
|
||||
it('should fall back to hash for pure non-ASCII names', () => {
|
||||
expect(sanitizeProviderName('测试')).toMatch(/^p_[a-z0-9]+$/)
|
||||
// deterministic: same input produces same hash
|
||||
expect(sanitizeProviderName('测试')).toBe(sanitizeProviderName('测试'))
|
||||
})
|
||||
|
||||
it('should handle various non-ASCII characters', () => {
|
||||
// Chinese
|
||||
expect(sanitizeProviderName('测试')).toMatch(/^p_[a-z0-9]+$/)
|
||||
// Japanese
|
||||
expect(sanitizeProviderName('プロバイダー')).toMatch(/^p_[a-z0-9]+$/)
|
||||
// Korean
|
||||
expect(sanitizeProviderName('공급자')).toMatch(/^p_[a-z0-9]+$/)
|
||||
// Emoji
|
||||
expect(sanitizeProviderName('🎉provider')).toBe('provider')
|
||||
})
|
||||
|
||||
it('should produce a valid env var identifier for mixed ASCII and non-ASCII', () => {
|
||||
expect(sanitizeProviderName('日本語Provider')).toBe('Provider')
|
||||
expect(sanitizeProviderName('My 测试 Provider')).toBe('My-Provider')
|
||||
})
|
||||
|
||||
it('should strip ASCII symbols not allowed in env var names', () => {
|
||||
expect(sanitizeProviderName('foo@bar')).toBe('foobar')
|
||||
expect(sanitizeProviderName('foo@bar+baz(test)')).toBe('foobarbaztest')
|
||||
expect(sanitizeProviderName('my$provider!name')).toBe('myprovidername')
|
||||
expect(sanitizeProviderName('a#b%c&d')).toBe('abcd')
|
||||
})
|
||||
|
||||
it('should keep allowed env-var-safe characters', () => {
|
||||
expect(sanitizeProviderName('my-provider')).toBe('my-provider')
|
||||
expect(sanitizeProviderName('my_provider')).toBe('my_provider')
|
||||
expect(sanitizeProviderName('my.provider')).toBe('my.provider')
|
||||
expect(sanitizeProviderName('Provider123')).toBe('Provider123')
|
||||
})
|
||||
})
|
||||
|
||||
describe('truncateText', () => {
|
||||
|
||||
@@ -207,16 +207,28 @@ export function getBriefInfo(text: string, maxLength: number = 50): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理 provider 名称,用于环境变量值:
|
||||
* - 替换空格为短横线
|
||||
* - 替换其他危险字符为下划线
|
||||
* 清理 provider 名称,用于环境变量名:
|
||||
* - 只保留 [a-zA-Z0-9_\s.-](白名单)
|
||||
* - 空格转短横线(下游会把 - 和 . 再转 _)
|
||||
* - 清理后为空时用 hash 兜底
|
||||
* @param {string} name 输入字符串
|
||||
* @returns {string} 清理后的字符串
|
||||
*/
|
||||
export function sanitizeProviderName(name: string): string {
|
||||
return name
|
||||
if (!name) return name
|
||||
|
||||
const sanitized = name
|
||||
.replace(/[^a-zA-Z0-9_\s.-]/g, '') // whitelist: only keep env-var-safe chars
|
||||
.replace(/\s+/g, '-') // spaces -> dashes
|
||||
.replace(/[<>:"|?*\\/_]/g, '_') // dangerous chars -> underscores
|
||||
|
||||
if (!sanitized) {
|
||||
let hash = 0
|
||||
for (let i = 0; i < name.length; i++) {
|
||||
hash = ((hash << 5) - hash + name.charCodeAt(i)) | 0
|
||||
}
|
||||
return 'p_' + Math.abs(hash).toString(36)
|
||||
}
|
||||
return sanitized
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user