feat: add LightRAG runtime config and quickstart updates

Squash the branch changes into a single commit.

Includes the LightRAG/memory workflow branch state, runtime-config API/UI, and quickstart service hardening.
This commit is contained in:
lyfics
2026-06-19 02:55:12 +08:00
committed by zyairehhh
parent e27b1c6501
commit f65f2bb5d0
39 changed files with 1615 additions and 67 deletions

View File

@@ -132,6 +132,16 @@ OpenTalking's **orchestration layer** (API / Worker / frontend) and **digital-hu
## Quickstart
### Quick Experience: Compshare Image
If you want to try the OpenTalking + OmniRT + QuickTalk real-time digital-human path before setting up everything manually, use the community image we published on Compshare:
- Image URL: <https://www.compshare.cn/images/TdDwmKZUZebI>
- Exposed port: `5173` (WebUI; API traffic is proxied internally)
- Guide: [Compshare image quick experience](docs/en/quick-start/compshare-image.md)
The image includes OpenTalking, OmniRT, the QuickTalk runtime environment, and model files. After deploying an instance, open port `5173` and visit the instance URL provided by the platform. If you need to restart services manually, follow the commands in the guide.
Use this path when you are trying the project for the first time and do not want to download video model weights yet. The digital-human image uses the built-in static Mock frame, while LLM replies, streaming TTS, subtitle events, and WebRTC delivery still run through the full product path.
```bash

View File

@@ -132,6 +132,16 @@ OpenTalking's **orchestration layer** (API / Worker / frontend) and **digital-hu
## Quickstart
### Quick Experience: Compshare Image
If you want to try the OpenTalking + OmniRT + QuickTalk real-time digital-human path before setting up everything manually, use the community image we published on Compshare:
- Image URL: <https://www.compshare.cn/images/TdDwmKZUZebI>
- Exposed port: `5173` (WebUI; API traffic is proxied internally)
- Guide: [Compshare image quick experience](docs/en/quick-start/compshare-image.md)
The image includes OpenTalking, OmniRT, the QuickTalk runtime environment, and model files. After deploying an instance, open port `5173` and visit the instance URL provided by the platform. If you need to restart services manually, follow the commands in the guide.
Use this path when you are trying the project for the first time and do not want to download video model weights yet. The digital-human image uses the built-in static Mock frame, while LLM replies, streaming TTS, subtitle events, and WebRTC delivery still run through the full product path.
```bash

View File

@@ -132,6 +132,16 @@ OpenTalking 的 **编排层**API / Worker / 前端)和 **数字人合成后
## 快速开始
### 快速体验:优云智算镜像
如果你只是想先体验 OpenTalking + OmniRT + QuickTalk 的实时数字人链路,可以直接使用我们在优云智算发布的社区镜像:
- 镜像地址:<https://www.compshare.cn/images/TdDwmKZUZebI>
- 对外端口:`5173`WebUI内部自动代理 API
- 操作文档:[优云智算镜像快速体验](docs/zh/quick-start/compshare-image.md)
镜像内已预置 OpenTalking、OmniRT、QuickTalk 运行环境和模型文件。部署实例后开放 `5173` 端口,在浏览器访问平台提供的实例地址即可进入 WebUI如需手动重启服务请按操作文档中的命令执行。
适用:第一次接触项目,不下载视频模型权重,先用 Mock 模式跑通产品链路。数字人画面使用内置静态帧LLM 回复、流式 TTS、字幕事件和 WebRTC 传输仍是完整链路。
```bash

View File

@@ -9,7 +9,7 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from apps.api.core.config import get_settings
from apps.api.routes import agent, avatars, events, exports, health, memory, models, personas, sessions, tts_preview, video_clone, video_creation, voices
from apps.api.routes import agent, avatars, events, exports, health, memory, models, personas, runtime_config, sessions, tts_preview, video_clone, video_creation, voices
from opentalking.voice.store import init_voice_store
@@ -43,6 +43,7 @@ def create_app() -> FastAPI:
app.include_router(personas.router)
app.include_router(events.router)
app.include_router(exports.router)
app.include_router(runtime_config.router)
app.include_router(tts_preview.router)
app.include_router(video_clone.router)
app.include_router(video_creation.router)

View File

@@ -0,0 +1,478 @@
from __future__ import annotations
import os
import shutil
import time
from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel, Field
from apps.api.core.config import get_settings
from opentalking.providers.stt.factory import (
clear_stt_adapter_cache,
normalize_stt_provider,
stt_enabled_providers,
stt_provider_config,
)
from opentalking.providers.tts.factory import tts_enabled_providers, tts_provider_config
from opentalking.providers.tts.providers import normalize_tts_provider
router = APIRouter(prefix="/runtime-config", tags=["runtime-config"])
_ENV_PATH = Path(__file__).resolve().parents[3] / ".env"
_RUNTIME_ENV_KEYS = {
"DASHSCOPE_API_KEY",
"OPENTALKING_LLM_PROVIDER",
"OPENTALKING_LLM_BASE_URL",
"OPENTALKING_LLM_API_KEY",
"OPENTALKING_LLM_MODEL",
"OPENTALKING_STT_DEFAULT_PROVIDER",
"OPENTALKING_STT_ENABLED_PROVIDERS",
"OPENTALKING_STT_MODEL",
"OPENTALKING_STT_API_KEY",
"OPENTALKING_STT_DASHSCOPE_MODEL",
"OPENTALKING_STT_DASHSCOPE_API_KEY",
"OPENTALKING_STT_OPENAI_BASE_URL",
"OPENTALKING_STT_OPENAI_MODEL",
"OPENTALKING_STT_OPENAI_API_KEY",
"OPENTALKING_STT_XIAOMI_BASE_URL",
"OPENTALKING_STT_XIAOMI_MODEL",
"OPENTALKING_STT_XIAOMI_API_KEY",
"OPENTALKING_STT_SENSEVOICE_MODEL",
"OPENTALKING_STT_FUNASR_MODEL",
"OPENTALKING_STT_SHERPA_ONNX_MODEL",
"OPENTALKING_TTS_PROVIDER",
"OPENTALKING_TTS_DEFAULT_PROVIDER",
"OPENTALKING_TTS_ENABLED_PROVIDERS",
"OPENTALKING_TTS_VOICE",
"OPENTALKING_TTS_EDGE_VOICE",
"OPENTALKING_TTS_DASHSCOPE_SERVICE_URL",
"OPENTALKING_TTS_DASHSCOPE_MODEL",
"OPENTALKING_TTS_DASHSCOPE_VOICE",
"OPENTALKING_TTS_DASHSCOPE_API_KEY",
"OPENTALKING_TTS_COSYVOICE_SERVICE_URL",
"OPENTALKING_TTS_COSYVOICE_MODEL",
"OPENTALKING_TTS_SAMBERT_MODEL",
"OPENTALKING_TTS_LOCAL_COSYVOICE_SERVICE_URL",
"OPENTALKING_TTS_LOCAL_COSYVOICE_MODEL",
"OPENTALKING_TTS_LOCAL_INDEXTTS_SERVICE_URL",
"OPENTALKING_TTS_LOCAL_INDEXTTS_MODEL",
"OPENTALKING_TTS_OMNIRT_INDEXTTS_SERVICE_URL",
"OPENTALKING_TTS_OMNIRT_INDEXTTS_MODEL",
"OPENTALKING_TTS_OPENAI_BASE_URL",
"OPENTALKING_TTS_OPENAI_MODEL",
"OPENTALKING_TTS_OPENAI_VOICE",
"OPENTALKING_TTS_OPENAI_API_KEY",
"OPENTALKING_TTS_XIAOMI_BASE_URL",
"OPENTALKING_TTS_XIAOMI_MODEL",
"OPENTALKING_TTS_XIAOMI_VOICE",
"OPENTALKING_TTS_XIAOMI_API_KEY",
}
class RuntimeConfigPayload(BaseModel):
llm_base_url: Optional[str] = Field(default=None, max_length=2048)
llm_model: Optional[str] = Field(default=None, max_length=256)
llm_api_key: Optional[str] = Field(default=None, max_length=4096)
stt_provider: Optional[str] = Field(default=None, max_length=64)
stt_base_url: Optional[str] = Field(default=None, max_length=2048)
stt_model: Optional[str] = Field(default=None, max_length=256)
stt_api_key: Optional[str] = Field(default=None, max_length=4096)
tts_provider: Optional[str] = Field(default=None, max_length=64)
tts_base_url: Optional[str] = Field(default=None, max_length=2048)
tts_model: Optional[str] = Field(default=None, max_length=256)
tts_voice: Optional[str] = Field(default=None, max_length=256)
tts_api_key: Optional[str] = Field(default=None, max_length=4096)
sync_dashscope_api_key: bool = True
def _strip(value: str | None) -> str:
return (value or "").strip()
def _unquote_env_value(value: str) -> str:
if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}:
return value[1:-1]
return value
def _quote_env_value(value: str) -> str:
if not value:
return ""
if any(ch.isspace() for ch in value) or any(ch in value for ch in ['"', "'", "#"]):
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
return value
def _read_env_lines(path: Path) -> tuple[list[str], dict[str, str]]:
if not path.exists():
return [], {}
lines = path.read_text(encoding="utf-8").splitlines()
values: dict[str, str] = {}
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("#") or "=" not in stripped:
continue
key, value = stripped.split("=", 1)
key = key.removeprefix("export ").strip()
if key:
values[key] = _unquote_env_value(value.strip())
return lines, values
def _env_value(values: dict[str, str], key: str, fallback: str = "") -> str:
return os.environ.get(key, "").strip() or values.get(key, "").strip() or fallback
def _settings_value(settings: Any, name: str, default: str = "") -> str:
value = getattr(settings, name, default)
return str(value or "").strip()
def _enabled_provider_csv(current: list[str], provider: str) -> str:
providers = [item for item in current if item]
if provider and provider not in providers:
providers.append(provider)
return ",".join(providers)
def _write_env_updates(path: Path, updates: dict[str, str]) -> None:
lines, _ = _read_env_lines(path)
if path.exists():
shutil.copy2(path, path.with_name(f"{path.name}.bak.{int(time.time())}"))
seen: set[str] = set()
out: list[str] = []
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("#") or "=" not in stripped:
out.append(line)
continue
raw_key, _ = stripped.split("=", 1)
key = raw_key.removeprefix("export ").strip()
if key in updates:
prefix = "export " if raw_key.strip().startswith("export ") else ""
out.append(f"{prefix}{key}={_quote_env_value(updates[key])}")
seen.add(key)
else:
out.append(line)
if updates:
if out and out[-1].strip():
out.append("")
for key in sorted(updates):
if key not in seen:
out.append(f"{key}={_quote_env_value(updates[key])}")
tmp = path.with_suffix(f"{path.suffix}.tmp")
tmp.write_text("\n".join(out).rstrip() + "\n", encoding="utf-8")
tmp.replace(path)
def _safe_tts_provider(raw: str) -> str:
try:
provider = normalize_tts_provider(raw, default="edge") or "edge"
except ValueError:
provider = "edge"
return "indextts" if provider in {"local_indextts", "omnirt_indextts"} else provider
def _safe_stt_provider(raw: str) -> str:
try:
return normalize_stt_provider(raw, default="dashscope") or "dashscope"
except ValueError:
return "dashscope"
def _current_stt_payload(provider: str, settings: Any, values: dict[str, str]) -> dict[str, Any]:
status = stt_provider_config(provider)
if provider == "openai_compatible":
base_url = _env_value(values, "OPENTALKING_STT_OPENAI_BASE_URL", _settings_value(settings, "stt_openai_base_url"))
model = _env_value(values, "OPENTALKING_STT_OPENAI_MODEL", _settings_value(settings, "stt_openai_model", "whisper-1"))
key = _env_value(values, "OPENTALKING_STT_OPENAI_API_KEY", _settings_value(settings, "stt_openai_api_key"))
elif provider == "xiaomi_mimo":
base_url = _env_value(values, "OPENTALKING_STT_XIAOMI_BASE_URL", _settings_value(settings, "stt_xiaomi_base_url"))
model = _env_value(values, "OPENTALKING_STT_XIAOMI_MODEL", _settings_value(settings, "stt_xiaomi_model", "mimo-v2.5-asr"))
key = _env_value(values, "OPENTALKING_STT_XIAOMI_API_KEY", _settings_value(settings, "stt_xiaomi_api_key"))
elif provider == "sensevoice":
base_url = ""
model = _env_value(values, "OPENTALKING_STT_SENSEVOICE_MODEL", _settings_value(settings, "stt_sensevoice_model", "iic/SenseVoiceSmall"))
key = ""
elif provider == "funasr":
base_url = ""
model = _env_value(values, "OPENTALKING_STT_FUNASR_MODEL", _settings_value(settings, "stt_funasr_model", "iic/Fun-ASR-Nano-2512"))
key = ""
elif provider == "sherpa_onnx":
base_url = ""
model = _env_value(values, "OPENTALKING_STT_SHERPA_ONNX_MODEL", _settings_value(settings, "stt_sherpa_onnx_model"))
key = ""
else:
base_url = ""
model = _env_value(values, "OPENTALKING_STT_DASHSCOPE_MODEL", _settings_value(settings, "stt_dashscope_model", "paraformer-realtime-v2"))
key = (
_env_value(values, "OPENTALKING_STT_DASHSCOPE_API_KEY", _settings_value(settings, "stt_dashscope_api_key"))
or _env_value(values, "DASHSCOPE_API_KEY")
)
return {
"provider": provider,
"enabled_providers": stt_enabled_providers(),
"base_url": base_url.rstrip("/"),
"model": model or str(status.get("model") or ""),
"api_key_set": bool(key or status.get("key_set")),
"service_url_set": bool(base_url or status.get("service_url_set")),
}
def _current_tts_payload(provider: str, settings: Any, values: dict[str, str]) -> dict[str, Any]:
status = tts_provider_config(provider)
if provider == "openai_compatible":
base_url = _env_value(values, "OPENTALKING_TTS_OPENAI_BASE_URL", _settings_value(settings, "tts_openai_base_url"))
model = _env_value(values, "OPENTALKING_TTS_OPENAI_MODEL", _settings_value(settings, "tts_openai_model", "gpt-4o-mini-tts"))
voice = _env_value(values, "OPENTALKING_TTS_OPENAI_VOICE", _settings_value(settings, "tts_openai_voice", "alloy"))
key = _env_value(values, "OPENTALKING_TTS_OPENAI_API_KEY", _settings_value(settings, "tts_openai_api_key"))
elif provider == "xiaomi_mimo":
base_url = _env_value(values, "OPENTALKING_TTS_XIAOMI_BASE_URL", _settings_value(settings, "tts_xiaomi_base_url"))
model = _env_value(values, "OPENTALKING_TTS_XIAOMI_MODEL", _settings_value(settings, "tts_xiaomi_model", "mimo-v2.5-tts"))
voice = _env_value(values, "OPENTALKING_TTS_XIAOMI_VOICE", _settings_value(settings, "tts_xiaomi_voice", "mimo_default"))
key = _env_value(values, "OPENTALKING_TTS_XIAOMI_API_KEY", _settings_value(settings, "tts_xiaomi_api_key"))
elif provider == "cosyvoice":
base_url = _env_value(values, "OPENTALKING_TTS_COSYVOICE_SERVICE_URL", _settings_value(settings, "tts_cosyvoice_service_url"))
model = _env_value(values, "OPENTALKING_TTS_COSYVOICE_MODEL", _settings_value(settings, "tts_cosyvoice_model", "cosyvoice-v3-flash"))
voice = _env_value(values, "OPENTALKING_TTS_VOICE", _settings_value(settings, "tts_voice"))
key = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_API_KEY", _settings_value(settings, "tts_dashscope_api_key")) or _env_value(values, "DASHSCOPE_API_KEY")
elif provider == "sambert":
base_url = ""
model = _env_value(values, "OPENTALKING_TTS_SAMBERT_MODEL", _settings_value(settings, "tts_sambert_model", "sambert-zhichu-v1"))
voice = _env_value(values, "OPENTALKING_TTS_VOICE", _settings_value(settings, "tts_voice"))
key = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_API_KEY", _settings_value(settings, "tts_dashscope_api_key")) or _env_value(values, "DASHSCOPE_API_KEY")
elif provider == "local_cosyvoice":
base_url = _env_value(values, "OPENTALKING_TTS_LOCAL_COSYVOICE_SERVICE_URL", _settings_value(settings, "tts_local_cosyvoice_service_url"))
model = _env_value(values, "OPENTALKING_TTS_LOCAL_COSYVOICE_MODEL", _settings_value(settings, "tts_local_cosyvoice_model", "FunAudioLLM/Fun-CosyVoice3-0.5B-2512"))
voice = _env_value(values, "OPENTALKING_TTS_VOICE", _settings_value(settings, "tts_voice"))
key = ""
elif provider == "indextts":
base_url = (
_env_value(values, "OPENTALKING_TTS_LOCAL_INDEXTTS_SERVICE_URL", _settings_value(settings, "tts_local_indextts_service_url"))
or _env_value(values, "OPENTALKING_TTS_OMNIRT_INDEXTTS_SERVICE_URL", _settings_value(settings, "tts_omnirt_indextts_service_url"))
)
model = (
_env_value(values, "OPENTALKING_TTS_LOCAL_INDEXTTS_MODEL", _settings_value(settings, "tts_local_indextts_model", "IndexTeam/IndexTTS-2"))
or _env_value(values, "OPENTALKING_TTS_OMNIRT_INDEXTTS_MODEL", _settings_value(settings, "tts_omnirt_indextts_model", "IndexTeam/IndexTTS-2"))
)
voice = _env_value(values, "OPENTALKING_TTS_VOICE", _settings_value(settings, "tts_voice"))
key = ""
elif provider == "dashscope":
base_url = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_SERVICE_URL", _settings_value(settings, "tts_dashscope_service_url", "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"))
model = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_MODEL", _settings_value(settings, "tts_dashscope_model", "qwen3-tts-flash-realtime"))
voice = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_VOICE", _settings_value(settings, "tts_dashscope_voice", "Cherry"))
key = _env_value(values, "OPENTALKING_TTS_DASHSCOPE_API_KEY", _settings_value(settings, "tts_dashscope_api_key")) or _env_value(values, "DASHSCOPE_API_KEY")
else:
base_url = ""
model = ""
voice = _env_value(values, "OPENTALKING_TTS_EDGE_VOICE", _settings_value(settings, "tts_edge_voice", "zh-CN-XiaoxiaoNeural"))
key = ""
return {
"provider": provider,
"enabled_providers": tts_enabled_providers(),
"base_url": base_url.rstrip("/"),
"model": model or str(status.get("model") or ""),
"voice": voice or str(status.get("voice") or ""),
"api_key_set": bool(key or status.get("key_set")),
"service_url_set": bool(base_url or status.get("service_url_set")),
}
def _current_payload(settings: Any | None = None) -> dict[str, Any]:
settings = settings or get_settings()
_, values = _read_env_lines(_ENV_PATH)
tts_provider = _safe_tts_provider(
_env_value(values, "OPENTALKING_TTS_DEFAULT_PROVIDER")
or _env_value(values, "OPENTALKING_TTS_PROVIDER")
or _settings_value(settings, "normalized_tts_default_provider")
or _settings_value(settings, "normalized_tts_provider")
or _settings_value(settings, "tts_provider", "edge")
)
stt_provider = _safe_stt_provider(
_env_value(values, "OPENTALKING_STT_DEFAULT_PROVIDER")
or _settings_value(settings, "normalized_stt_default_provider")
or _settings_value(settings, "normalized_stt_provider")
or _settings_value(settings, "stt_provider", "dashscope")
)
llm_key = _env_value(values, "OPENTALKING_LLM_API_KEY", _settings_value(settings, "llm_api_key"))
return {
"llm": {
"base_url": _env_value(values, "OPENTALKING_LLM_BASE_URL", _settings_value(settings, "llm_base_url")).rstrip("/"),
"model": _env_value(values, "OPENTALKING_LLM_MODEL", _settings_value(settings, "llm_model", "qwen-turbo")),
"api_key_set": bool(llm_key),
},
"stt": _current_stt_payload(stt_provider, settings, values),
"tts": _current_tts_payload(tts_provider, settings, values),
}
def _build_updates(payload: RuntimeConfigPayload) -> dict[str, str]:
updates: dict[str, str] = {"OPENTALKING_LLM_PROVIDER": "openai_compatible"}
sync_key = ""
if value := _strip(payload.llm_base_url):
updates["OPENTALKING_LLM_BASE_URL"] = value.rstrip("/")
if value := _strip(payload.llm_model):
updates["OPENTALKING_LLM_MODEL"] = value
if value := _strip(payload.llm_api_key):
updates["OPENTALKING_LLM_API_KEY"] = value
sync_key = value
stt_provider = ""
if raw := _strip(payload.stt_provider):
try:
stt_provider = normalize_stt_provider(raw, default=None) or ""
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
if stt_provider:
updates["OPENTALKING_STT_DEFAULT_PROVIDER"] = stt_provider
updates["OPENTALKING_STT_ENABLED_PROVIDERS"] = _enabled_provider_csv(stt_enabled_providers(), stt_provider)
if value := _strip(payload.stt_base_url):
if stt_provider == "openai_compatible":
updates["OPENTALKING_STT_OPENAI_BASE_URL"] = value.rstrip("/")
elif stt_provider == "xiaomi_mimo":
updates["OPENTALKING_STT_XIAOMI_BASE_URL"] = value.rstrip("/")
if value := _strip(payload.stt_model):
updates["OPENTALKING_STT_MODEL"] = value
if stt_provider == "openai_compatible":
updates["OPENTALKING_STT_OPENAI_MODEL"] = value
elif stt_provider == "xiaomi_mimo":
updates["OPENTALKING_STT_XIAOMI_MODEL"] = value
elif stt_provider == "sensevoice":
updates["OPENTALKING_STT_SENSEVOICE_MODEL"] = value
elif stt_provider == "funasr":
updates["OPENTALKING_STT_FUNASR_MODEL"] = value
elif stt_provider == "sherpa_onnx":
updates["OPENTALKING_STT_SHERPA_ONNX_MODEL"] = value
else:
updates["OPENTALKING_STT_DASHSCOPE_MODEL"] = value
if value := _strip(payload.stt_api_key):
if stt_provider == "openai_compatible":
updates["OPENTALKING_STT_OPENAI_API_KEY"] = value
elif stt_provider == "xiaomi_mimo":
updates["OPENTALKING_STT_XIAOMI_API_KEY"] = value
else:
updates["OPENTALKING_STT_DASHSCOPE_API_KEY"] = value
sync_key = sync_key or value
tts_provider = ""
if raw := _strip(payload.tts_provider):
try:
normalized = normalize_tts_provider(raw, default=None) or ""
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
tts_provider = "indextts" if normalized in {"local_indextts", "omnirt_indextts"} else normalized
if tts_provider:
updates["OPENTALKING_TTS_PROVIDER"] = tts_provider
updates["OPENTALKING_TTS_DEFAULT_PROVIDER"] = tts_provider
updates["OPENTALKING_TTS_ENABLED_PROVIDERS"] = _enabled_provider_csv(tts_enabled_providers(), tts_provider)
if value := _strip(payload.tts_base_url):
key = {
"dashscope": "OPENTALKING_TTS_DASHSCOPE_SERVICE_URL",
"cosyvoice": "OPENTALKING_TTS_COSYVOICE_SERVICE_URL",
"local_cosyvoice": "OPENTALKING_TTS_LOCAL_COSYVOICE_SERVICE_URL",
"indextts": "OPENTALKING_TTS_LOCAL_INDEXTTS_SERVICE_URL",
"openai_compatible": "OPENTALKING_TTS_OPENAI_BASE_URL",
"xiaomi_mimo": "OPENTALKING_TTS_XIAOMI_BASE_URL",
}.get(tts_provider)
if key:
updates[key] = value.rstrip("/")
if value := _strip(payload.tts_model):
key = {
"dashscope": "OPENTALKING_TTS_DASHSCOPE_MODEL",
"cosyvoice": "OPENTALKING_TTS_COSYVOICE_MODEL",
"sambert": "OPENTALKING_TTS_SAMBERT_MODEL",
"local_cosyvoice": "OPENTALKING_TTS_LOCAL_COSYVOICE_MODEL",
"indextts": "OPENTALKING_TTS_LOCAL_INDEXTTS_MODEL",
"openai_compatible": "OPENTALKING_TTS_OPENAI_MODEL",
"xiaomi_mimo": "OPENTALKING_TTS_XIAOMI_MODEL",
}.get(tts_provider)
if key:
updates[key] = value
if value := _strip(payload.tts_voice):
updates["OPENTALKING_TTS_VOICE"] = value
if tts_provider == "edge":
updates["OPENTALKING_TTS_EDGE_VOICE"] = value
elif tts_provider == "dashscope":
updates["OPENTALKING_TTS_DASHSCOPE_VOICE"] = value
elif tts_provider == "openai_compatible":
updates["OPENTALKING_TTS_OPENAI_VOICE"] = value
elif tts_provider == "xiaomi_mimo":
updates["OPENTALKING_TTS_XIAOMI_VOICE"] = value
if value := _strip(payload.tts_api_key):
if tts_provider == "openai_compatible":
updates["OPENTALKING_TTS_OPENAI_API_KEY"] = value
elif tts_provider == "xiaomi_mimo":
updates["OPENTALKING_TTS_XIAOMI_API_KEY"] = value
else:
updates["OPENTALKING_TTS_DASHSCOPE_API_KEY"] = value
sync_key = sync_key or value
if payload.sync_dashscope_api_key and sync_key:
updates.setdefault("DASHSCOPE_API_KEY", sync_key)
return updates
def _refresh_settings(request: Request) -> Any:
get_settings.cache_clear()
settings = get_settings()
request.app.state.settings = settings
clear_stt_adapter_cache()
return settings
def _refresh_live_runners(request: Request, settings: Any) -> int:
runners = getattr(request.app.state, "session_runners", None)
if not isinstance(runners, dict):
return 0
count = 0
for runner in list(runners.values()):
if hasattr(runner, "_llm_base_url"):
runner._llm_base_url = settings.llm_base_url
runner._llm_api_key = settings.llm_api_key
runner._llm_model = settings.llm_model
runner._llm_client = None
count += 1
if hasattr(runner, "llm"):
from opentalking.providers.llm.openai_compatible.adapter import OpenAICompatibleLLMClient
runner.llm = OpenAICompatibleLLMClient(
base_url=settings.llm_base_url,
api_key=settings.llm_api_key,
model=settings.llm_model,
)
count += 1
return count
@router.get("")
async def get_runtime_config(request: Request) -> dict[str, Any]:
return _current_payload(getattr(request.app.state, "settings", None))
@router.post("/apply")
async def apply_runtime_config(payload: RuntimeConfigPayload, request: Request) -> dict[str, Any]:
updates = _build_updates(payload)
unknown = set(updates) - _RUNTIME_ENV_KEYS
if unknown:
raise HTTPException(status_code=400, detail=f"unsupported runtime config keys: {', '.join(sorted(unknown))}")
_write_env_updates(_ENV_PATH, updates)
_, values = _read_env_lines(_ENV_PATH)
for key in _RUNTIME_ENV_KEYS:
if key in values:
os.environ[key] = values[key]
settings = _refresh_settings(request)
refreshed_runners = _refresh_live_runners(request, settings)
result = _current_payload(settings)
result["applied"] = True
result["requires_new_session"] = refreshed_runners == 0
result["live_runners_refreshed"] = refreshed_runners
return result

View File

@@ -0,0 +1,129 @@
from __future__ import annotations
import os
from types import SimpleNamespace
import pytest
from fastapi import HTTPException
from apps.api.routes import runtime_config
from opentalking.core.config import get_settings
def _clear_runtime_env(monkeypatch) -> None:
for key in runtime_config._RUNTIME_ENV_KEYS:
monkeypatch.delenv(key, raising=False)
def _request(monkeypatch, tmp_path) -> SimpleNamespace:
_clear_runtime_env(monkeypatch)
monkeypatch.chdir(tmp_path)
monkeypatch.setattr(runtime_config, "_ENV_PATH", tmp_path / ".env")
get_settings.cache_clear()
app = SimpleNamespace()
app.state = SimpleNamespace()
app.state.settings = get_settings()
app.state.session_runners = {}
return SimpleNamespace(app=app)
async def test_runtime_config_get_masks_secret_values(monkeypatch, tmp_path) -> None:
(tmp_path / ".env").write_text(
"\n".join(
[
"OPENTALKING_LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1",
"OPENTALKING_LLM_MODEL=qwen-turbo",
"OPENTALKING_LLM_API_KEY=sk-llm-secret",
"OPENTALKING_STT_DEFAULT_PROVIDER=openai_compatible",
"OPENTALKING_STT_OPENAI_BASE_URL=https://asr.example.test/v1",
"OPENTALKING_STT_OPENAI_MODEL=whisper-1",
"OPENTALKING_STT_OPENAI_API_KEY=sk-stt-secret",
"OPENTALKING_TTS_DEFAULT_PROVIDER=openai_compatible",
"OPENTALKING_TTS_OPENAI_BASE_URL=https://tts.example.test/v1",
"OPENTALKING_TTS_OPENAI_MODEL=gpt-4o-mini-tts",
"OPENTALKING_TTS_OPENAI_API_KEY=sk-tts-secret",
]
)
+ "\n",
encoding="utf-8",
)
payload = await runtime_config.get_runtime_config(_request(monkeypatch, tmp_path))
assert payload["llm"]["api_key_set"] is True
assert payload["stt"]["provider"] == "openai_compatible"
assert payload["stt"]["base_url"] == "https://asr.example.test/v1"
assert payload["stt"]["model"] == "whisper-1"
assert payload["stt"]["api_key_set"] is True
assert payload["tts"]["provider"] == "openai_compatible"
assert payload["tts"]["base_url"] == "https://tts.example.test/v1"
assert payload["tts"]["model"] == "gpt-4o-mini-tts"
assert payload["tts"]["api_key_set"] is True
assert "sk-llm-secret" not in str(payload)
assert "sk-stt-secret" not in str(payload)
assert "sk-tts-secret" not in str(payload)
async def test_runtime_config_apply_persists_llm_stt_tts_and_keeps_blank_keys(monkeypatch, tmp_path) -> None:
(tmp_path / ".env").write_text(
"\n".join(
[
"OPENTALKING_LLM_API_KEY=sk-existing-llm",
"OPENTALKING_STT_OPENAI_API_KEY=sk-existing-stt",
"OPENTALKING_TTS_OPENAI_API_KEY=sk-existing-tts",
]
)
+ "\n",
encoding="utf-8",
)
payload = await runtime_config.apply_runtime_config(
runtime_config.RuntimeConfigPayload(
llm_base_url="https://llm.example.test/v1/",
llm_model="qwen-plus",
llm_api_key="",
stt_provider="openai_compatible",
stt_base_url="https://asr.example.test/v1/",
stt_model="whisper-large-v3",
stt_api_key="",
tts_provider="openai_compatible",
tts_base_url="https://tts.example.test/v1/",
tts_model="gpt-4o-mini-tts",
tts_voice="alloy",
tts_api_key="",
),
_request(monkeypatch, tmp_path),
)
assert payload["applied"] is True
assert payload["llm"]["base_url"] == "https://llm.example.test/v1"
assert payload["llm"]["model"] == "qwen-plus"
assert payload["llm"]["api_key_set"] is True
assert payload["stt"]["provider"] == "openai_compatible"
assert payload["stt"]["base_url"] == "https://asr.example.test/v1"
assert payload["stt"]["model"] == "whisper-large-v3"
assert payload["stt"]["api_key_set"] is True
assert payload["tts"]["provider"] == "openai_compatible"
assert payload["tts"]["base_url"] == "https://tts.example.test/v1"
assert payload["tts"]["model"] == "gpt-4o-mini-tts"
assert payload["tts"]["voice"] == "alloy"
assert payload["tts"]["api_key_set"] is True
assert "sk-existing-llm" not in str(payload)
assert "sk-existing-stt" not in str(payload)
assert "sk-existing-tts" not in str(payload)
assert os.environ["OPENTALKING_LLM_API_KEY"] == "sk-existing-llm"
assert os.environ["OPENTALKING_STT_OPENAI_API_KEY"] == "sk-existing-stt"
assert os.environ["OPENTALKING_TTS_OPENAI_API_KEY"] == "sk-existing-tts"
async def test_runtime_config_apply_rejects_unknown_provider(monkeypatch, tmp_path) -> None:
with pytest.raises(HTTPException) as exc_info:
await runtime_config.apply_runtime_config(
runtime_config.RuntimeConfigPayload(
stt_provider="not-a-provider",
tts_provider="openai_compatible",
),
_request(monkeypatch, tmp_path),
)
assert exc_info.value.status_code == 400

View File

@@ -19,7 +19,7 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from apps.api.core.config import get_settings
from apps.api.routes.avatars import _call_adapter_warmup
from apps.api.routes import agent, avatars, events, exports, health, memory, models, personas, sessions, tts_preview, video_clone, video_creation, voices
from apps.api.routes import agent, avatars, events, exports, health, memory, models, personas, runtime_config, sessions, tts_preview, video_clone, video_creation, voices
from opentalking.voice.store import init_voice_store
from opentalking.core.in_memory_redis import InMemoryRedis
from opentalking.pipeline.session.runner import SessionRunner
@@ -219,6 +219,7 @@ def create_app() -> FastAPI:
app.include_router(personas.router)
app.include_router(events.router)
app.include_router(exports.router)
app.include_router(runtime_config.router)
app.include_router(tts_preview.router)
app.include_router(video_clone.router)
app.include_router(video_creation.router)

View File

@@ -21,6 +21,7 @@ import {
} from "./components/VideoCreationWorkspace";
import {
ApiError,
applyRuntimeConfig,
apiDelete,
apiGet,
apiPost,
@@ -29,6 +30,7 @@ import {
apiUploadFile,
buildApiUrl,
getMemoryLibraries,
loadRuntimeConfig,
uploadExportVideo,
type AvatarKnowledgeBasesResponse,
type AvatarSummary,
@@ -38,6 +40,8 @@ import {
type KnowledgeBasesResponse,
type PersonaSummary,
type PersonasResponse,
type RuntimeConfigApplyInput,
type RuntimeConfigResponse,
type SessionKnowledgeBasesRequest,
type SessionKnowledgeBasesResponse,
type VoiceCatalogItem,
@@ -889,6 +893,9 @@ export default function App() {
const [isSpeaking, setIsSpeaking] = useState(false);
const [currentSubtitle, setCurrentSubtitle] = useState("");
const [, setRuntimeStatus] = useState<HealthResponse | null>(null);
const [runtimeConfig, setRuntimeConfig] = useState<RuntimeConfigResponse | null>(null);
const [runtimeConfigLoading, setRuntimeConfigLoading] = useState(false);
const [runtimeConfigApplying, setRuntimeConfigApplying] = useState(false);
const clearSubtitleFallbackTimer = useCallback(() => {
if (subtitleFallbackTimerRef.current !== null) {
@@ -1046,6 +1053,62 @@ export default function App() {
}, tone === "error" ? 5200 : 3600);
}, []);
const syncRuntimeConfigSelection = useCallback((next: RuntimeConfigResponse) => {
const nextAsrProvider = normalizeAsrProvider(next.stt.provider, "dashscope");
setAsrProvider(nextAsrProvider);
setAsrModel(next.stt.model || sttModelForProvider(nextAsrProvider));
const nextTtsProvider = normalizeTtsProvider(next.tts.provider, "edge");
setTtsProvider(nextTtsProvider);
if (next.tts.model) {
setQwenModel(next.tts.model);
}
if (next.tts.voice) {
if (nextTtsProvider === "edge") {
setEdgeVoice(next.tts.voice);
} else {
setQwenVoice(next.tts.voice);
}
}
}, []);
const refreshRuntimeConfig = useCallback(async () => {
setRuntimeConfigLoading(true);
try {
const next = await loadRuntimeConfig();
setRuntimeConfig(next);
syncRuntimeConfigSelection(next);
return next;
} catch (error) {
console.warn("load runtime config failed", error);
const detail = error instanceof ApiError ? error.detail : null;
notify(detail ? `运行配置读取失败:${detail}` : "运行配置读取失败,请查看后端日志。", "error");
return null;
} finally {
setRuntimeConfigLoading(false);
}
}, [notify, syncRuntimeConfigSelection]);
const handleApplyRuntimeConfig = useCallback(async (input: RuntimeConfigApplyInput) => {
setRuntimeConfigApplying(true);
try {
const next = await applyRuntimeConfig(input);
setRuntimeConfig(next);
syncRuntimeConfigSelection(next);
void apiGet<HealthResponse>("/health")
.then(setRuntimeStatus)
.catch((error) => console.warn("refresh health after runtime config failed", error));
notify(next.requires_new_session ? "运行配置已保存,下次会话生效。" : "运行配置已应用。", "success");
} catch (error) {
console.warn("apply runtime config failed", error);
const detail = error instanceof ApiError ? error.detail : null;
notify(detail ? `运行配置应用失败:${detail}` : "运行配置应用失败,请查看后端日志。", "error");
throw error;
} finally {
setRuntimeConfigApplying(false);
}
}, [notify, syncRuntimeConfigSelection]);
const syncSessionKnowledgeBases = useCallback((knowledgeBaseIds: string[]) => {
const sid = sessionIdRef.current;
if (!sid) return;
@@ -1693,20 +1756,29 @@ export default function App() {
useEffect(() => {
void (async () => {
try {
const [av, mo, health] = await Promise.all([
const [av, mo, health, , initialRuntimeConfig] = await Promise.all([
apiGet<AvatarSummary[]>("/avatars"),
apiGet<{ models: string[]; statuses?: ModelStatus[]; default_model?: string | null }>("/models"),
apiGet<HealthResponse>("/health"),
loadVoices(),
loadRuntimeConfig().catch((error) => {
console.warn("load runtime config during init failed", error);
return null;
}),
]);
setRuntimeStatus(health);
setAvatars(av);
setModels(mo.models);
setAsrProvider((prev) => {
const next = normalizeAsrProvider(prev || health.stt_provider, "dashscope");
setAsrModel(sttModelForProvider(next));
return next;
});
if (initialRuntimeConfig) {
setRuntimeConfig(initialRuntimeConfig);
syncRuntimeConfigSelection(initialRuntimeConfig);
} else {
setAsrProvider((prev) => {
const next = normalizeAsrProvider(prev || health.stt_provider, "dashscope");
setAsrModel(sttModelForProvider(next));
return next;
});
}
const statuses = mo.statuses ?? mo.models.map((id) => ({ id, connected: true }));
setModelStatuses(statuses);
const storedAvatarSelection = readStoredAvatarSelection();
@@ -1725,7 +1797,7 @@ export default function App() {
setConnection("error");
}
})();
}, [loadVoices]);
}, [loadVoices, syncRuntimeConfigSelection]);
// ---------- SSE ----------
useEffect(() => {
@@ -2671,6 +2743,11 @@ export default function App() {
avatarId={avatarId}
model={model}
modelConnected={selectedModelConnected}
runtimeConfig={runtimeConfig}
runtimeConfigLoading={runtimeConfigLoading}
runtimeConfigApplying={runtimeConfigApplying}
onRuntimeConfigRefresh={() => void refreshRuntimeConfig()}
onRuntimeConfigApply={handleApplyRuntimeConfig}
wav2lipPostprocessMode={wav2lipPostprocessMode}
wav2lipPostprocessModeLocked={wav2lipPostprocessModeLocked}
fasterliveportraitConfig={fasterliveportraitConfig}

View File

@@ -1,6 +1,6 @@
import { useEffect, useState, type ReactNode } from "react";
import type { AgentConfig } from "./AvatarSelectionStage";
import type { AvatarSummary, KnowledgeBaseSummary } from "../lib/api";
import type { AvatarSummary, KnowledgeBaseSummary, RuntimeConfigApplyInput, RuntimeConfigResponse } from "../lib/api";
import { modelConnectionBadge, type ModelStatus } from "../lib/modelStatus";
import type { TtsProviderExtended } from "../constants/ttsBailian";
import type { MemoryLibrary } from "../types";
@@ -103,6 +103,97 @@ const ASR_PROVIDER_MODELS: Record<string, string> = {
sensevoice: "iic/SenseVoiceSmall",
};
const RUNTIME_LLM_DEFAULT = {
baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1",
model: "qwen-turbo",
};
const RUNTIME_STT_PRESETS: Record<string, { label: string; baseUrl: string; model: string; needsKey: boolean }> = {
dashscope: {
label: "DashScope",
baseUrl: "https://dashscope.aliyuncs.com",
model: "paraformer-realtime-v2",
needsKey: true,
},
openai_compatible: {
label: "OpenAI-compatible",
baseUrl: "https://api.openai.com/v1",
model: "whisper-1",
needsKey: true,
},
xiaomi_mimo: {
label: "小米 MiMo",
baseUrl: "",
model: "mimo-v2.5-asr",
needsKey: true,
},
sensevoice: {
label: "SenseVoice",
baseUrl: "",
model: "iic/SenseVoiceSmall",
needsKey: false,
},
};
const RUNTIME_TTS_PRESETS: Record<TtsProviderExtended, { label: string; baseUrl: string; model: string; voice: string; needsKey: boolean }> = {
edge: {
label: "Edge",
baseUrl: "",
model: "",
voice: "zh-CN-XiaoxiaoNeural",
needsKey: false,
},
dashscope: {
label: "Qwen",
baseUrl: "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
model: "qwen3-tts-flash-realtime",
voice: "Cherry",
needsKey: true,
},
cosyvoice: {
label: "CosyVoice",
baseUrl: "",
model: "cosyvoice-v3-flash",
voice: "longanyang",
needsKey: true,
},
sambert: {
label: "Sambert",
baseUrl: "",
model: "sambert-zhichu-v1",
voice: "",
needsKey: true,
},
local_cosyvoice: {
label: "Local CosyVoice",
baseUrl: "http://127.0.0.1:9880",
model: "FunAudioLLM/Fun-CosyVoice3-0.5B-2512",
voice: "",
needsKey: false,
},
indextts: {
label: "Local IndexTTS",
baseUrl: "http://127.0.0.1:9880",
model: "IndexTeam/IndexTTS-2",
voice: "",
needsKey: false,
},
xiaomi_mimo: {
label: "小米 MiMo",
baseUrl: "",
model: "mimo-v2.5-tts",
voice: "mimo_default",
needsKey: true,
},
openai_compatible: {
label: "OpenAI-compatible",
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
needsKey: true,
},
};
const WAV2LIP_POSTPROCESS_OPTIONS: { id: Wav2LipPostprocessMode; label: string }[] = [
{ id: "auto", label: "自动推荐" },
{ id: "basic", label: "基础" },
@@ -153,6 +244,71 @@ const FASTERLIVEPORTRAIT_ANIMATION_REGION_OPTIONS: {
{ id: "eyes", label: "眼睛" },
];
type RuntimeConfigForm = {
llmBaseUrl: string;
llmModel: string;
llmApiKey: string;
sttProvider: string;
sttBaseUrl: string;
sttModel: string;
sttApiKey: string;
ttsProvider: TtsProviderExtended;
ttsBaseUrl: string;
ttsModel: string;
ttsVoice: string;
ttsApiKey: string;
syncDashscopeApiKey: boolean;
};
const RUNTIME_FORM_DEFAULTS: RuntimeConfigForm = {
llmBaseUrl: RUNTIME_LLM_DEFAULT.baseUrl,
llmModel: RUNTIME_LLM_DEFAULT.model,
llmApiKey: "",
sttProvider: "dashscope",
sttBaseUrl: RUNTIME_STT_PRESETS.dashscope.baseUrl,
sttModel: RUNTIME_STT_PRESETS.dashscope.model,
sttApiKey: "",
ttsProvider: "dashscope",
ttsBaseUrl: RUNTIME_TTS_PRESETS.dashscope.baseUrl,
ttsModel: RUNTIME_TTS_PRESETS.dashscope.model,
ttsVoice: RUNTIME_TTS_PRESETS.dashscope.voice,
ttsApiKey: "",
syncDashscopeApiKey: true,
};
function normalizeRuntimeTtsProvider(value: string | null | undefined): TtsProviderExtended {
const normalized = (value ?? "").trim();
if (normalized === "local_indextts" || normalized === "omnirt_indextts") return "indextts";
return Object.prototype.hasOwnProperty.call(RUNTIME_TTS_PRESETS, normalized)
? normalized as TtsProviderExtended
: "dashscope";
}
function runtimeFormFromConfig(runtimeConfig: RuntimeConfigResponse | null): RuntimeConfigForm {
if (!runtimeConfig) return { ...RUNTIME_FORM_DEFAULTS };
const sttProvider = Object.prototype.hasOwnProperty.call(RUNTIME_STT_PRESETS, runtimeConfig.stt.provider)
? runtimeConfig.stt.provider
: "dashscope";
const sttPreset = RUNTIME_STT_PRESETS[sttProvider] ?? RUNTIME_STT_PRESETS.dashscope;
const ttsProvider = normalizeRuntimeTtsProvider(runtimeConfig.tts.provider);
const ttsPreset = RUNTIME_TTS_PRESETS[ttsProvider];
return {
llmBaseUrl: runtimeConfig.llm.base_url || RUNTIME_LLM_DEFAULT.baseUrl,
llmModel: runtimeConfig.llm.model || RUNTIME_LLM_DEFAULT.model,
llmApiKey: "",
sttProvider,
sttBaseUrl: runtimeConfig.stt.base_url || sttPreset.baseUrl,
sttModel: runtimeConfig.stt.model || sttPreset.model,
sttApiKey: "",
ttsProvider,
ttsBaseUrl: runtimeConfig.tts.base_url || ttsPreset.baseUrl,
ttsModel: runtimeConfig.tts.model || ttsPreset.model,
ttsVoice: runtimeConfig.tts.voice || ttsPreset.voice,
ttsApiKey: "",
syncDashscopeApiKey: true,
};
}
interface SettingsPanelProps {
/** 展开时显示表单;收起时仅保留右侧竖条入口 */
expanded: boolean;
@@ -163,6 +319,11 @@ interface SettingsPanelProps {
avatarId: string;
model: string;
modelConnected: boolean;
runtimeConfig: RuntimeConfigResponse | null;
runtimeConfigLoading?: boolean;
runtimeConfigApplying?: boolean;
onRuntimeConfigRefresh: () => void;
onRuntimeConfigApply: (input: RuntimeConfigApplyInput) => Promise<void>;
wav2lipPostprocessMode: Wav2LipPostprocessMode;
wav2lipPostprocessModeLocked: boolean;
fasterliveportraitConfig: FasterLivePortraitConfig;
@@ -393,6 +554,11 @@ export function SettingsPanel({
avatarId,
model,
modelConnected,
runtimeConfig,
runtimeConfigLoading = false,
runtimeConfigApplying = false,
onRuntimeConfigRefresh,
onRuntimeConfigApply,
wav2lipPostprocessMode,
wav2lipPostprocessModeLocked,
fasterliveportraitConfig,
@@ -441,6 +607,7 @@ export function SettingsPanel({
onManageMemoryLibraries,
}: SettingsPanelProps) {
const [openSections, setOpenSections] = useState<Record<string, boolean>>({
runtime: true,
avatars: true,
knowledge: true,
memory: true,
@@ -450,6 +617,7 @@ export function SettingsPanel({
role: true,
});
const [voiceView, setVoiceView] = useState<"providers" | "models" | "voices">("providers");
const [runtimeForm, setRuntimeForm] = useState<RuntimeConfigForm>(() => runtimeFormFromConfig(runtimeConfig));
useEffect(() => {
if (!voiceApplyNotice) return;
@@ -457,6 +625,10 @@ export function SettingsPanel({
setVoiceView("voices");
}, [voiceApplyNotice]);
useEffect(() => {
setRuntimeForm(runtimeFormFromConfig(runtimeConfig));
}, [runtimeConfig]);
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (e.key === "Escape" && expanded) {
@@ -470,6 +642,55 @@ export function SettingsPanel({
const toggleSection = (id: string) => {
setOpenSections((prev) => ({ ...prev, [id]: !prev[id] }));
};
const updateRuntimeForm = <K extends keyof RuntimeConfigForm>(key: K, value: RuntimeConfigForm[K]) => {
setRuntimeForm((prev) => ({ ...prev, [key]: value }));
};
const selectRuntimeSttProvider = (provider: string) => {
const preset = RUNTIME_STT_PRESETS[provider] ?? RUNTIME_STT_PRESETS.dashscope;
setRuntimeForm((prev) => ({
...prev,
sttProvider: provider,
sttBaseUrl: preset.baseUrl,
sttModel: preset.model,
}));
};
const selectRuntimeTtsProvider = (provider: TtsProviderExtended) => {
const preset = RUNTIME_TTS_PRESETS[provider];
setRuntimeForm((prev) => ({
...prev,
ttsProvider: provider,
ttsBaseUrl: preset.baseUrl,
ttsModel: preset.model,
ttsVoice: preset.voice,
}));
};
const handleRuntimeApply = async () => {
const payload: RuntimeConfigApplyInput = {
llm_base_url: runtimeForm.llmBaseUrl.trim(),
llm_model: runtimeForm.llmModel.trim(),
stt_provider: runtimeForm.sttProvider,
stt_base_url: runtimeForm.sttBaseUrl.trim(),
stt_model: runtimeForm.sttModel.trim(),
tts_provider: runtimeForm.ttsProvider,
tts_base_url: runtimeForm.ttsBaseUrl.trim(),
tts_model: runtimeForm.ttsModel.trim(),
tts_voice: runtimeForm.ttsVoice.trim(),
sync_dashscope_api_key: runtimeForm.syncDashscopeApiKey,
};
const llmApiKey = runtimeForm.llmApiKey.trim();
const sttApiKey = runtimeForm.sttApiKey.trim();
const ttsApiKey = runtimeForm.ttsApiKey.trim();
if (llmApiKey) payload.llm_api_key = llmApiKey;
if (sttApiKey) payload.stt_api_key = sttApiKey;
if (ttsApiKey) payload.tts_api_key = ttsApiKey;
await onRuntimeConfigApply(payload);
setRuntimeForm((prev) => ({
...prev,
llmApiKey: "",
sttApiKey: "",
ttsApiKey: "",
}));
};
const selectedKnowledgeBaseSet = new Set(agentConfig.knowledgeBaseIds);
const updateKnowledgeBaseIds = (nextIds: string[]) => {
const deduped = Array.from(new Set(nextIds.filter((id) => id.trim())));
@@ -523,6 +744,20 @@ export function SettingsPanel({
label: option.label,
subtitle: option.id,
}));
const runtimeSttPreset = RUNTIME_STT_PRESETS[runtimeForm.sttProvider] ?? RUNTIME_STT_PRESETS.dashscope;
const runtimeTtsPreset = RUNTIME_TTS_PRESETS[runtimeForm.ttsProvider];
const runtimeLlmKeySet = Boolean(runtimeConfig?.llm.api_key_set || runtimeForm.llmApiKey.trim());
const runtimeSttSavedKeySet = runtimeConfig?.stt.provider === runtimeForm.sttProvider && runtimeConfig.stt.api_key_set;
const runtimeTtsSavedKeySet = normalizeRuntimeTtsProvider(runtimeConfig?.tts.provider) === runtimeForm.ttsProvider && runtimeConfig?.tts.api_key_set;
const runtimeSttKeySet = Boolean(runtimeSttSavedKeySet || runtimeForm.sttApiKey.trim() || !runtimeSttPreset.needsKey);
const runtimeTtsKeySet = Boolean(runtimeTtsSavedKeySet || runtimeForm.ttsApiKey.trim() || !runtimeTtsPreset.needsKey);
const runtimeConfigNeedsSetup = !runtimeLlmKeySet || !runtimeSttKeySet || !runtimeTtsKeySet;
const runtimeBadgeLabel = runtimeConfigLoading ? "读取中" : runtimeConfigNeedsSetup ? "待配置" : "已配置";
const runtimeBadgeClass = runtimeConfigNeedsSetup
? "border-amber-200 bg-amber-50 text-amber-700"
: "border-emerald-200 bg-emerald-50 text-emerald-700";
const runtimeSttProviderOptions = Object.entries(RUNTIME_STT_PRESETS);
const runtimeTtsProviderOptions = Object.entries(RUNTIME_TTS_PRESETS) as [TtsProviderExtended, typeof RUNTIME_TTS_PRESETS[TtsProviderExtended]][];
const providerHasSingleModel = (provider: TtsProviderExtended) => {
if (provider === "edge" || provider === "openai_compatible") return true;
@@ -580,6 +815,202 @@ export function SettingsPanel({
</div>
<div className="space-y-4 p-4 pt-0 lg:min-h-0 lg:flex-1 lg:overflow-y-auto">
<SettingsSection
id="runtime"
title="运行配置"
open={openSections.runtime}
onToggle={toggleSection}
action={
<div className="flex shrink-0 items-center gap-2">
<button
type="button"
onClick={onRuntimeConfigRefresh}
disabled={runtimeConfigLoading || runtimeConfigApplying}
className="min-h-8 px-1 text-xs font-semibold text-slate-600 transition hover:text-cyan-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{runtimeConfigLoading ? "读取中" : "刷新"}
</button>
<span className={`shrink-0 rounded-full border px-2 py-0.5 text-xs font-semibold ${runtimeBadgeClass}`}>
{runtimeBadgeLabel}
</span>
</div>
}
>
<div className="space-y-3">
<div className="rounded-lg border border-slate-200 bg-slate-50 p-3">
<div className="mb-2 flex items-center justify-between gap-2">
<p className="text-xs font-semibold text-slate-500">LLM</p>
<span className={`rounded-full border px-2 py-0.5 text-[11px] font-semibold ${
runtimeLlmKeySet ? "border-emerald-200 bg-emerald-50 text-emerald-700" : "border-amber-200 bg-amber-50 text-amber-700"
}`}>
{runtimeLlmKeySet ? "Key 已设置" : "Key 未设置"}
</span>
</div>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Base URL</span>
<input
value={runtimeForm.llmBaseUrl}
onChange={(event) => updateRuntimeForm("llmBaseUrl", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<div className="mt-2 grid grid-cols-1 gap-2 sm:grid-cols-2">
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Model</span>
<input
value={runtimeForm.llmModel}
onChange={(event) => updateRuntimeForm("llmModel", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">API Key</span>
<input
type="password"
value={runtimeForm.llmApiKey}
onChange={(event) => updateRuntimeForm("llmApiKey", event.target.value)}
autoComplete="new-password"
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
</div>
</div>
<div className="rounded-lg border border-slate-200 bg-slate-50 p-3">
<div className="mb-2 flex items-center justify-between gap-2">
<p className="text-xs font-semibold text-slate-500">STT</p>
<span className={`rounded-full border px-2 py-0.5 text-[11px] font-semibold ${
runtimeSttKeySet ? "border-emerald-200 bg-emerald-50 text-emerald-700" : "border-amber-200 bg-amber-50 text-amber-700"
}`}>
{runtimeSttKeySet ? "Key 已设置" : "Key 未设置"}
</span>
</div>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Provider</span>
<select
value={runtimeForm.sttProvider}
onChange={(event) => selectRuntimeSttProvider(event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-semibold text-slate-800 outline-none transition focus:border-cyan-300"
>
{runtimeSttProviderOptions.map(([provider, preset]) => (
<option key={provider} value={provider}>{preset.label}</option>
))}
</select>
</label>
<div className="mt-2 grid grid-cols-1 gap-2">
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Base URL</span>
<input
value={runtimeForm.sttBaseUrl}
onChange={(event) => updateRuntimeForm("sttBaseUrl", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<div className="grid grid-cols-1 gap-2 sm:grid-cols-2">
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Model</span>
<input
value={runtimeForm.sttModel}
onChange={(event) => updateRuntimeForm("sttModel", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">API Key</span>
<input
type="password"
value={runtimeForm.sttApiKey}
onChange={(event) => updateRuntimeForm("sttApiKey", event.target.value)}
autoComplete="new-password"
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
</div>
</div>
</div>
<div className="rounded-lg border border-slate-200 bg-slate-50 p-3">
<div className="mb-2 flex items-center justify-between gap-2">
<p className="text-xs font-semibold text-slate-500">TTS</p>
<span className={`rounded-full border px-2 py-0.5 text-[11px] font-semibold ${
runtimeTtsKeySet ? "border-emerald-200 bg-emerald-50 text-emerald-700" : "border-amber-200 bg-amber-50 text-amber-700"
}`}>
{runtimeTtsKeySet ? "Key 已设置" : "Key 未设置"}
</span>
</div>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Provider</span>
<select
value={runtimeForm.ttsProvider}
onChange={(event) => selectRuntimeTtsProvider(event.target.value as TtsProviderExtended)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-semibold text-slate-800 outline-none transition focus:border-cyan-300"
>
{runtimeTtsProviderOptions.map(([provider, preset]) => (
<option key={provider} value={provider}>{preset.label}</option>
))}
</select>
</label>
<div className="mt-2 grid grid-cols-1 gap-2">
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Base URL</span>
<input
value={runtimeForm.ttsBaseUrl}
onChange={(event) => updateRuntimeForm("ttsBaseUrl", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<div className="grid grid-cols-1 gap-2 sm:grid-cols-3">
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Model</span>
<input
value={runtimeForm.ttsModel}
onChange={(event) => updateRuntimeForm("ttsModel", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">Voice</span>
<input
value={runtimeForm.ttsVoice}
onChange={(event) => updateRuntimeForm("ttsVoice", event.target.value)}
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
<label className="block">
<span className="mb-1 block text-xs font-medium text-slate-500">API Key</span>
<input
type="password"
value={runtimeForm.ttsApiKey}
onChange={(event) => updateRuntimeForm("ttsApiKey", event.target.value)}
autoComplete="new-password"
className="h-9 w-full rounded-md border border-slate-200 bg-white px-2.5 text-xs font-medium text-slate-800 outline-none transition focus:border-cyan-300"
/>
</label>
</div>
</div>
</div>
<label className="flex items-center justify-between gap-3 rounded-lg border border-slate-200 bg-white px-3 py-2">
<span className="min-w-0 text-xs font-semibold text-slate-600"> DashScope Key</span>
<input
type="checkbox"
checked={runtimeForm.syncDashscopeApiKey}
onChange={(event) => updateRuntimeForm("syncDashscopeApiKey", event.target.checked)}
className="h-4 w-4 shrink-0 accent-cyan-600"
/>
</label>
<button
type="button"
onClick={() => void handleRuntimeApply()}
disabled={runtimeConfigApplying || runtimeConfigLoading}
className="w-full rounded-lg bg-slate-950 px-3 py-2.5 text-sm font-semibold text-white transition hover:bg-slate-800 disabled:cursor-not-allowed disabled:opacity-60"
>
{runtimeConfigApplying ? "应用中..." : "应用配置"}
</button>
</div>
</SettingsSection>
<SettingsSection
id="avatars"
title="数字人形象"

View File

@@ -117,6 +117,64 @@ export async function apiUploadFile<T>(path: string, fieldName: string, file: Fi
return apiPostForm<T>(path, form);
}
export type RuntimeConfigLlm = {
base_url: string;
model: string;
api_key_set: boolean;
};
export type RuntimeConfigStt = {
provider: string;
enabled_providers: string[];
base_url: string;
model: string;
api_key_set: boolean;
service_url_set: boolean;
};
export type RuntimeConfigTts = {
provider: string;
enabled_providers: string[];
base_url: string;
model: string;
voice: string;
api_key_set: boolean;
service_url_set: boolean;
};
export type RuntimeConfigResponse = {
llm: RuntimeConfigLlm;
stt: RuntimeConfigStt;
tts: RuntimeConfigTts;
applied?: boolean;
requires_new_session?: boolean;
live_runners_refreshed?: number;
};
export type RuntimeConfigApplyInput = {
llm_base_url?: string;
llm_model?: string;
llm_api_key?: string;
stt_provider?: string;
stt_base_url?: string;
stt_model?: string;
stt_api_key?: string;
tts_provider?: string;
tts_base_url?: string;
tts_model?: string;
tts_voice?: string;
tts_api_key?: string;
sync_dashscope_api_key?: boolean;
};
export async function loadRuntimeConfig(): Promise<RuntimeConfigResponse> {
return apiGet<RuntimeConfigResponse>("/runtime-config");
}
export async function applyRuntimeConfig(input: RuntimeConfigApplyInput): Promise<RuntimeConfigResponse> {
return apiPost<RuntimeConfigResponse>("/runtime-config/apply", input);
}
export type ExportVideoKind = "realtime_dialogue" | "video_clone" | "video_creation";
export type ExportVideoItem = {

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 583 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 263 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 459 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 563 KiB

View File

@@ -0,0 +1,97 @@
# Compshare Image Quick Experience
This page explains how to try the published OpenTalking image on Compshare. The whole flow uses only the Compshare console and the OpenTalking Studio web page.
- Image URL: <https://www.compshare.cn/images/TdDwmKZUZebI>
- Browser access entry: 5173
- Default experience path: OpenTalking Studio + OmniRT + QuickTalk
The screenshots in this guide have been redacted to hide account, balance, instance identifier, public address, QR code, and other sensitive information.
## 1. Register and Verify Your Account
If you do not have a Compshare account, open the image page and follow the page prompt to register. Choose “手机号注册” for phone registration or “邮箱注册” for email registration. For phone registration, enter your phone number, click “获取验证码”, enter the verification code, select the service agreement checkbox, and click “立即注册”. If you already have an account, click “登录已有账号”.
![Register a Compshare account](../../assets/images/compshare-redacted/register-image.png)
After registration, Compshare may show a bonus verification dialog. Choose the identity card that matches your usage. Individual users usually choose “个人开发者/研究员” or “个人爱好者”, then click “前往认证”.
![Choose an identity type and continue verification](../../assets/images/compshare-redacted/register-get5yuan.png)
On the “实名认证” page, choose the matching verification type. Individual users click “立即认证” in the “个人认证” card. Education or organization users should click “立即认证” in the matching card.
![Open the real-name verification page](../../assets/images/compshare-redacted/get5yuan.png)
If the page shows QR verification, scan the QR code with your phone and finish the authorization steps there. The QR code in this screenshot is redacted; scan the QR code displayed on your own page.
![Scan the QR code to complete verification](../../assets/images/compshare-redacted/scan-to-authorize.png)
## 2. Create an Instance From the Image
Open the OpenTalking image page and confirm that the title is OpenTalking. In the action panel on the right, click “使用该镜像创建实例” to enter the instance deployment page.
![Click the button to create an instance from the image](../../assets/images/compshare-redacted/image.png)
## 3. Choose the Instance Configuration
On the deployment page, choose a region first. Prefer a region with available resources. If a region or GPU type has no available capacity, switch to another region or GPU type.
In the “实例配置” section, confirm “实例类型”, “GPU 型号”, “GPU 数量”, and “CPU 配置”. For a quick trial, use a single-GPU instance and set the GPU count to 1.
![Choose region, GPU, and CPU configuration](../../assets/images/compshare-redacted/choose-config.png)
Scroll down and review the storage settings and the “当前配置” panel on the right. After confirming the configuration, choose a billing method under “付款方式”, then click “立即部署”.
![Confirm the configuration and click deploy](../../assets/images/compshare-redacted/getconfig.png)
## 4. Wait Until the Instance Is Running
After submitting the deployment, return to “实例列表”. A newly created instance may show an initializing status. Wait for it to finish; if the list does not update, click the refresh button in the upper-right corner.
![Wait for the instance to initialize](../../assets/images/compshare-redacted/listofhosts.png)
When the instance status becomes “运行中”, several action buttons appear on the right side of the instance card. To try OpenTalking, click “WebUI”. You can also see the “Omnirt-Quicktalk” button in the same row if you need to confirm the backend service entry.
![Click WebUI after the instance is running](../../assets/images/compshare-redacted/host-running.png)
## 5. Open OpenTalking Studio
After clicking “WebUI”, the browser opens OpenTalking Studio. On first load, the page may show “未连接” or prepare assets. Wait until the top-right status changes to “已连接”.
In the left “静态配置” panel, confirm the speech and recognition service settings. If you need to use your own service credentials, enter them in the matching fields and click “应用配置”.
Choose a digital human in the center “形象库”. On the right, confirm that “已选驱动模型” is QuickTalk and the status is “已连接”, then click “开始对话”.
![Apply settings and start the conversation](../../assets/images/compshare-redacted/config-to-submit.png)
If the page says it is preparing the current digital-human asset, wait for it to finish. The first asset preparation creates a cache, so later use is usually faster.
![Wait for digital-human assets to finish preparing](../../assets/images/compshare-redacted/wait-to-config.png)
After the connection succeeds, type a sentence in the input box at the bottom and send it, or click the microphone button for voice input. The “会话面板” on the right shows the conversation, and the digital human plays the response in the main area.
![Realtime conversation page](../../assets/images/compshare-redacted/realtime-talking.png)
## 6. Try Video Creation and Voice Cloning
To create an offline talking-head video, click “视频创作” in the top navigation. Choose a source avatar on the left, select “离线数字人口播” in the center, choose QuickTalk, and keep the task type as “数字人口播视频”.
Under “音频来源”, choose “上传音频”, “文本合成”, or “复刻音色”. To try voice cloning, click “复刻音色”, enter the talking script, choose a voice, then click “录制/上传复刻”.
![Open video creation and choose voice cloning](../../assets/images/compshare-redacted/videocreation-clone-tone.png)
The “音色复刻” panel shows the text to read. Record or upload an audio file as prompted. After confirming that the audio can be played, click the submit button and wait for the cloning task to finish.
![Submit the voice cloning task](../../assets/images/compshare-redacted/submitting-clone.png)
After voice cloning finishes, return to the video creation page. Click “试听口播” to preview the voice, or click “生成并保存”. When generation completes, the “生成结果” panel on the right shows a video preview and provides “下载” and “去资产库查看”.
![Preview the generated result and download it](../../assets/images/compshare-redacted/voice-clone-success.png)
## 7. Common Situations
- The page is blank after clicking “WebUI”: wait for the instance self-start process to finish. First startup and first asset preparation both take some time.
- The top-right status shows “未连接”: wait briefly and refresh the page. If it still does not connect, return to the instance list and confirm that the instance status is “运行中”.
- You cannot find the entry: click “WebUI” on the right side of the instance card instead of using an old browser tab.
- Voice input is unavailable: the browser may restrict microphone permissions on public web pages. Use text input for the first trial.
- You want to start over: return to the instance list, click “更多操作” on the right side of the instance, and use the restart option provided by the platform.

View File

@@ -6,6 +6,16 @@ This page helps you quickly run OpenTalking. Start with **Mock mode** to validat
- QuickTalk mode: uses a local CUDA GPU and QuickTalk weights to validate the real digital-human rendering path.
- WebUI validation: select avatar, model, and voice in the page, then start a real-time conversation.
## Quick Experience: Compshare Image
If you want to skip local dependency installation and model downloads, deploy our published Compshare community image:
- Image URL: <https://www.compshare.cn/images/TdDwmKZUZebI>
- Exposed port: `5173`
- Guide: [Compshare image quick experience](compshare-image.md)
The image already includes OpenTalking, OmniRT, the QuickTalk runtime environment, and model files. Use it to try the real digital-human path first; continue with the source-based steps below when you need local installation or development.
## Mock Mode
Mock mode is the recommended first path for OpenTalking. It does not require GPU, model weights, or an external inference service, but still validates the API, LLM, TTS, subtitle events, WebRTC, and browser playback path.

View File

@@ -0,0 +1,97 @@
# 优云智算镜像快速体验
本页介绍如何在优云智算平台使用已发布的 OpenTalking 镜像完成一次快速体验。全流程只需要在平台页面和 OpenTalking Studio 页面中点击操作。
- 镜像地址:<https://www.compshare.cn/images/TdDwmKZUZebI>
- 浏览器访问入口5173
- 默认体验链路OpenTalking Studio + OmniRT + QuickTalk
文档中的截图已对账号、余额、实例标识、公网地址、二维码等敏感信息做打码处理。
## 1. 注册并完成认证
如果还没有优云智算账号,打开镜像页面后按页面提示进入注册页。你可以选择“手机号注册”或“邮箱注册”。以手机号注册为例,填写手机号,点击“获取验证码”,输入验证码,勾选服务协议,然后点击“立即注册”。已有账号可以点击“登录已有账号”。
![注册优云智算账号](../../assets/images/compshare-redacted/register-image.png)
注册后如果弹出“实名领赠金”窗口,选择符合自己身份的卡片。个人用户通常选择“个人开发者/研究员”或“个人爱好者”,然后点击右下角的“前往认证”。
![选择认证身份并前往认证](../../assets/images/compshare-redacted/register-get5yuan.png)
进入“实名认证”页面后,选择对应认证类型。个人用户点击“个人认证”卡片中的“立即认证”;高校或企业用户按自己的组织类型点击对应卡片中的“立即认证”。
![进入实名认证页面](../../assets/images/compshare-redacted/get5yuan.png)
如果页面显示扫码认证,用手机扫描页面中的二维码并按手机端提示完成授权。截图中的二维码已打码,实际操作时请扫描你自己页面上显示的二维码。
![扫码完成实名认证授权](../../assets/images/compshare-redacted/scan-to-authorize.png)
## 2. 从镜像页创建实例
打开 OpenTalking 镜像页面后,确认页面标题为 OpenTalking。右侧操作区有“使用该镜像创建实例”按钮点击它进入实例部署页面。
![点击使用该镜像创建实例](../../assets/images/compshare-redacted/image.png)
## 3. 选择实例配置
进入部署页面后,先选择地域。建议优先选择有空闲资源的地域;如果某个地域或卡型显示资源不足,可以切换到其他地域或 GPU 型号。
在“实例配置”区域依次确认“实例类型”“GPU 型号”“GPU 数量”和“CPU 配置”。快速体验建议使用单卡实例GPU 数量选择 1 即可。
![选择地域、GPU 和 CPU 配置](../../assets/images/compshare-redacted/choose-config.png)
继续向下检查存储配置和右侧“当前配置”。确认无误后,在“付款方式”中选择合适的计费方式,然后点击右下角“立即部署”。
![确认配置并点击立即部署](../../assets/images/compshare-redacted/getconfig.png)
## 4. 等待实例运行
部署提交后,回到“实例列表”。实例刚创建时会显示“初始化”等状态,此时等待即可;如果列表没有变化,可以点击右上角刷新按钮。
![等待实例初始化](../../assets/images/compshare-redacted/listofhosts.png)
当实例状态变为“运行中”后,实例卡片右侧会出现多个操作按钮。快速体验 OpenTalking 时点击“WebUI”。如果需要确认后端服务入口也可以在同一行看到“Omnirt-Quicktalk”按钮。
![实例运行后点击 WebUI](../../assets/images/compshare-redacted/host-running.png)
## 5. 打开 OpenTalking Studio
点击“WebUI”后浏览器会打开 OpenTalking Studio。首次进入时页面可能显示“未连接”或正在准备资源等待页面右上角变为“已连接”即可继续。
在左侧“静态配置”区域确认语音和识别服务配置。如果你需要填写自己的服务密钥,在对应输入框填入后点击“应用配置”。
中间“形象库”选择一个数字人形象,右侧确认“已选驱动模型”为 QuickTalk 且显示“已连接”,然后点击“开始对话”。
![应用配置并开始对话](../../assets/images/compshare-redacted/config-to-submit.png)
如果页面提示“正在准备当前形象资产”或按钮显示“准备资产中...”,等待准备完成。首次选择形象时需要生成缓存,后续再次使用会更快。
![等待形象资产准备完成](../../assets/images/compshare-redacted/wait-to-config.png)
连接成功后,可以在底部输入框输入一句话并发送,也可以点击麦克风按钮进行语音输入。右侧“会话面板”会显示对话记录,画面中会播放数字人回复。
![实时对话页面](../../assets/images/compshare-redacted/realtime-talking.png)
## 6. 体验视频创作和音色复刻
如需生成一段离线口播视频,点击顶部导航中的“视频创作”。左侧选择一个形象,中间选择“离线数字人口播”,模型选择 QuickTalk任务类型选择“数字人口播视频”。
在“音频来源”中可以选择“上传音频”“文本合成”或“复刻音色”。如果要体验音色复刻,点击“复刻音色”,填写口播文本,选择音色后点击“录制/上传复刻”。
![进入视频创作并选择复刻音色](../../assets/images/compshare-redacted/videocreation-clone-tone.png)
弹出的“音色复刻”面板会给出朗读文本。按提示录制或上传音频文件,确认音频可播放后点击提交按钮,等待复刻任务完成。
![提交音色复刻任务](../../assets/images/compshare-redacted/submitting-clone.png)
音色复刻完成后,回到视频创作页面。可以点击“试听口播”先确认声音效果,也可以点击“生成并保存”。生成完成后,右侧“生成结果”区域会出现视频预览,并提供“下载”和“去资产库查看”按钮。
![生成结果并下载或进入资产库](../../assets/images/compshare-redacted/voice-clone-success.png)
## 7. 常见情况
- 点击“WebUI”后页面短时间空白先等待实例完成自启动首次启动和首次准备形象资产都需要一点时间。
- 页面右上角显示“未连接”:等待一会儿后刷新页面;如果仍未连接,回到实例列表确认实例状态是否为“运行中”。
- 找不到入口在实例卡片右侧点击“WebUI”不要点击浏览器里的其他历史标签页。
- 语音输入不可用:浏览器可能限制公网页面的麦克风权限,首次体验可以直接使用文本输入。
- 想重新开始:回到实例列表,点击实例右侧“更多操作”,按平台页面提供的选项重启实例。

View File

@@ -7,6 +7,16 @@
- QuickTalk 模式:使用本地 CUDA GPU 和 QuickTalk 权重,验证真实数字人渲染链路。
- WebUI 验证:在页面中选择 Avatar、模型、音色发起一次实时对话。
## 快速体验:优云智算镜像
如果你希望跳过本地依赖安装和模型下载,可以直接部署我们发布的优云智算社区镜像:
- 镜像地址:<https://www.compshare.cn/images/TdDwmKZUZebI>
- 对外端口:`5173`
- 操作文档:[优云智算镜像快速体验](compshare-image.md)
镜像已预置 OpenTalking、OmniRT、QuickTalk 运行环境和模型文件,适合先体验真实数字人链路;需要从源码安装或做二次开发时,再继续阅读本页后续步骤。
## Mock 模式
Mock 模式是第一次使用 OpenTalking 的推荐路径。它不需要 GPU、模型权重或外部推理服务

View File

@@ -70,6 +70,7 @@ plugins:
Home: 首页
Quick Start: 快速开始
Platform Notes: 平台说明
Compshare Image Quick Experience: 优云智算镜像快速体验
Docker Deployment: Docker 部署
Windows Deployment: Windows 部署
Usage: 使用指南
@@ -206,6 +207,7 @@ nav:
- Quick Start:
- Quick Start: quick-start/index.md
- Platform Notes: quick-start/platform-notes.md
- Compshare Image Quick Experience: quick-start/compshare-image.md
- Docker Deployment: quick-start/docker-deployment.md
- Windows Deployment: quick-start/windows-deployment.md
- Usage:
@@ -286,6 +288,7 @@ nav:
- Quick Start:
- Quick Start: quick-start/index.md
- Platform Notes: quick-start/platform-notes.md
- Compshare Image Quick Experience: quick-start/compshare-image.md
- Docker Deployment: quick-start/docker-deployment.md
- Windows Deployment: quick-start/windows-deployment.md
- Usage:

View File

@@ -1446,7 +1446,8 @@ class FlashTalkRunner:
return self.model_type == "fasterliveportrait"
def _tts_opener_preload_voice(self) -> str | None:
return get_settings().tts_voice or None
settings = get_settings()
return (settings.tts_edge_voice or "").strip() or (settings.tts_voice or "").strip() or None
async def _select_tts_opener(
self,

View File

@@ -48,30 +48,6 @@ quickstart_describe_uv_default_index() {
printf '%s\n' "${UV_DEFAULT_INDEX:-${UV_INDEX_URL:-default}}"
}
quickstart_configure_utf8() {
local locale_name="${OPENTALKING_QUICKSTART_LOCALE:-}"
if [[ -z "$locale_name" ]]; then
if locale -a 2>/dev/null | grep -Eiq '^(C|c)\.(UTF-8|utf8)$'; then
locale_name="C.UTF-8"
else
locale_name="en_US.UTF-8"
fi
fi
export LANG="$locale_name"
export LC_ALL="$locale_name"
export PYTHONIOENCODING="${PYTHONIOENCODING:-utf-8}"
export PYTHONUTF8="${PYTHONUTF8:-1}"
export TERM="${OPENTALKING_QUICKSTART_TERM:-dumb}"
export NO_COLOR="${NO_COLOR:-1}"
export CLICOLOR="${CLICOLOR:-0}"
export FORCE_COLOR="${FORCE_COLOR:-0}"
export PY_COLORS="${PY_COLORS:-0}"
export TQDM_DISABLE="${TQDM_DISABLE:-1}"
export HF_HUB_DISABLE_PROGRESS_BARS="${HF_HUB_DISABLE_PROGRESS_BARS:-1}"
}
quickstart_source_env() {
local env_file="$1"
local restore_allexport=1

View File

@@ -9,7 +9,6 @@ source "$script_dir/_helpers.sh"
env_file="${OPENTALKING_QUICKSTART_ENV:-$script_dir/env}"
quickstart_source_env "$env_file"
quickstart_configure_utf8
usage() {
cat <<'USAGE'
@@ -83,6 +82,13 @@ if [[ -f "$pid_file" ]]; then
rm -f "$pid_file"
fi
if quickstart_port_in_use "$web_port"; then
echo "OpenTalking frontend port $web_port is already in use." >&2
echo "Stop the existing service first, or choose another --web-port." >&2
quickstart_describe_port "$web_port" >&2 || true
exit 1
fi
if [[ ! -d "$web_dir/node_modules" ]]; then
echo "Installing frontend dependencies with npm ci ..."
(cd "$web_dir" && npm ci)
@@ -97,7 +103,7 @@ echo " api: http://127.0.0.1:$backend_port"
(
cd "$web_dir"
export VITE_BACKEND_PORT="$backend_port"
quickstart_detach "$log_file" ./node_modules/.bin/vite --host "$web_host" --port "$web_port" >"$pid_file"
quickstart_detach "$log_file" ./node_modules/.bin/vite --host "$web_host" --port "$web_port" --strictPort >"$pid_file"
)
pid="$(cat "$pid_file" 2>/dev/null || true)"

View File

@@ -134,34 +134,79 @@ echo " uv index: $(quickstart_describe_uv_default_index)"
echo " port: $port"
echo " log: $log_file"
(
cd "$omnirt_dir"
source .venv/bin/activate
uv_bin=""
if [[ "$install_deps" == "1" ]]; then
uv_bin="$(quickstart_require_uv "OmniRT QuickTalk dependency installation")"
fi
if [[ "$install_deps" == "1" ]]; then
uv_bin="$(quickstart_require_uv "OmniRT QuickTalk dependency installation")"
"$uv_bin" sync --extra server --extra quicktalk-cuda --python 3.11
fi
pid="$(
quickstart_detach "$log_file" bash -c '
set -euo pipefail
export OMNIRT_MODEL_ROOT="$OMNIRT_MODEL_ROOT"
export OMNIRT_QUICKTALK_RUNTIME=1
export OMNIRT_QUICKTALK_MODEL_ROOT="$quicktalk_root"
export OMNIRT_QUICKTALK_CHECKPOINT="$checkpoint"
export OMNIRT_QUICKTALK_DEVICE="$device"
export OMNIRT_QUICKTALK_HUBERT_DEVICE="$hubert_device"
export OMNIRT_QUICKTALK_MAX_LONG_EDGE="${OMNIRT_QUICKTALK_MAX_LONG_EDGE:-900}"
export OMNIRT_QUICKTALK_MAX_TEMPLATE_SECONDS="${OMNIRT_QUICKTALK_MAX_TEMPLATE_SECONDS:-1}"
export OMNIRT_QUICKTALK_SCALE_H="${OMNIRT_QUICKTALK_SCALE_H:-1.6}"
export OMNIRT_QUICKTALK_SCALE_W="${OMNIRT_QUICKTALK_SCALE_W:-3.6}"
export OMNIRT_QUICKTALK_RESOLUTION="${OMNIRT_QUICKTALK_RESOLUTION:-256}"
export OMNIRT_QUICKTALK_NECK_FADE_START="${OMNIRT_QUICKTALK_NECK_FADE_START:-0.72}"
export OMNIRT_QUICKTALK_NECK_FADE_END="${OMNIRT_QUICKTALK_NECK_FADE_END:-0.88}"
export OMNIRT_ALLOWED_FRAME_ROOTS="${OMNIRT_ALLOWED_FRAME_ROOTS:-$DIGITAL_HUMAN_HOME/opentalking/examples/avatars}"
omnirt_dir="$1"
install_deps="$2"
uv_bin="$3"
omnirt_model_root="$4"
quicktalk_root="$5"
checkpoint="$6"
device="$7"
hubert_device="$8"
max_long_edge="$9"
max_template_seconds="${10}"
scale_h="${11}"
scale_w="${12}"
resolution="${13}"
neck_fade_start="${14}"
neck_fade_end="${15}"
allowed_frame_roots="${16}"
host="${17}"
port="${18}"
backend="${19}"
exec omnirt serve-avatar-ws --host "$host" --port "$port" --backend "$backend"
) >"$log_file" 2>&1 &
cd "$omnirt_dir"
source .venv/bin/activate
pid="$!"
if [[ "$install_deps" == "1" ]]; then
"$uv_bin" sync --extra server --extra quicktalk-cuda --python 3.11
fi
export OMNIRT_MODEL_ROOT="$omnirt_model_root"
export OMNIRT_QUICKTALK_RUNTIME=1
export OMNIRT_QUICKTALK_MODEL_ROOT="$quicktalk_root"
export OMNIRT_QUICKTALK_CHECKPOINT="$checkpoint"
export OMNIRT_QUICKTALK_DEVICE="$device"
export OMNIRT_QUICKTALK_HUBERT_DEVICE="$hubert_device"
export OMNIRT_QUICKTALK_MAX_LONG_EDGE="$max_long_edge"
export OMNIRT_QUICKTALK_MAX_TEMPLATE_SECONDS="$max_template_seconds"
export OMNIRT_QUICKTALK_SCALE_H="$scale_h"
export OMNIRT_QUICKTALK_SCALE_W="$scale_w"
export OMNIRT_QUICKTALK_RESOLUTION="$resolution"
export OMNIRT_QUICKTALK_NECK_FADE_START="$neck_fade_start"
export OMNIRT_QUICKTALK_NECK_FADE_END="$neck_fade_end"
export OMNIRT_ALLOWED_FRAME_ROOTS="$allowed_frame_roots"
exec omnirt serve-avatar-ws --host "$host" --port "$port" --backend "$backend"
' bash \
"$omnirt_dir" \
"$install_deps" \
"$uv_bin" \
"$OMNIRT_MODEL_ROOT" \
"$quicktalk_root" \
"$checkpoint" \
"$device" \
"$hubert_device" \
"${OMNIRT_QUICKTALK_MAX_LONG_EDGE:-900}" \
"${OMNIRT_QUICKTALK_MAX_TEMPLATE_SECONDS:-1}" \
"${OMNIRT_QUICKTALK_SCALE_H:-1.6}" \
"${OMNIRT_QUICKTALK_SCALE_W:-3.6}" \
"${OMNIRT_QUICKTALK_RESOLUTION:-256}" \
"${OMNIRT_QUICKTALK_NECK_FADE_START:-0.72}" \
"${OMNIRT_QUICKTALK_NECK_FADE_END:-0.88}" \
"${OMNIRT_ALLOWED_FRAME_ROOTS:-$DIGITAL_HUMAN_HOME/opentalking/examples/avatars}" \
"$host" \
"$port" \
"$backend"
)"
echo "$pid" > "$pid_file"
for _ in {1..180}; do

View File

@@ -9,7 +9,6 @@ source "$script_dir/_helpers.sh"
env_file="${OPENTALKING_QUICKSTART_ENV:-$script_dir/env}"
quickstart_source_env "$env_file"
quickstart_configure_utf8
usage() {
cat <<'USAGE'
@@ -83,9 +82,12 @@ mkdir -p "$run_dir" "$log_dir"
if [[ -f "$pid_file" ]]; then
old_pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ -n "$old_pid" ]] && kill -0 "$old_pid" >/dev/null 2>&1; then
echo "OpenTalking API is already running: pid=$old_pid port=$api_port"
echo "Log: $log_file"
exit 0
if curl --max-time 2 -fsS "http://127.0.0.1:$api_port/models" >/dev/null 2>&1; then
echo "OpenTalking API is already running: pid=$old_pid port=$api_port"
echo "Log: $log_file"
exit 0
fi
echo "Stale OpenTalking API pid file: pid=$old_pid port=$api_port" >&2
fi
rm -f "$pid_file"
fi

View File

@@ -58,6 +58,7 @@ while [[ $# -gt 0 ]]; do
esac
done
run_dir="$DIGITAL_HUMAN_HOME/run"
web_dir="$repo_root/apps/web"
stop_pid_file() {
local name="$1"
@@ -144,7 +145,7 @@ stop_unified_all() {
stop_vite_port() {
local port="$1"
local pids
pids="$(pgrep -f "$repo_root/apps/web/node_modules/.bin/vite .*--port $port" || true)"
pids="$(pgrep -f "vite .*--port $port" || true)"
if [[ -z "$pids" ]]; then
return
fi
@@ -152,6 +153,9 @@ stop_vite_port() {
if [[ "$pid" == "$$" ]]; then
continue
fi
if [[ "$(readlink -f "/proc/$pid/cwd" 2>/dev/null || true)" != "$web_dir" ]]; then
continue
fi
echo "Stopping OpenTalking frontend Vite residue: pid=$pid port=$port"
kill "$pid" >/dev/null 2>&1 || true
done
@@ -159,7 +163,7 @@ stop_vite_port() {
stop_vite_all() {
local pids
pids="$(pgrep -f "$repo_root/apps/web/node_modules/.bin/vite .*--port" || true)"
pids="$(pgrep -f "vite .*--port" || true)"
if [[ -z "$pids" ]]; then
return
fi
@@ -167,6 +171,9 @@ stop_vite_all() {
if [[ "$pid" == "$$" ]]; then
continue
fi
if [[ "$(readlink -f "/proc/$pid/cwd" 2>/dev/null || true)" != "$web_dir" ]]; then
continue
fi
echo "Stopping OpenTalking frontend Vite residue: pid=$pid"
kill "$pid" >/dev/null 2>&1 || true
done

View File

@@ -0,0 +1,44 @@
from __future__ import annotations
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
def test_runtime_config_section_is_first_realtime_settings_section() -> None:
source = (ROOT / "apps/web/src/components/SettingsPanel.tsx").read_text(encoding="utf-8")
runtime_idx = source.index('title="运行配置"')
avatar_idx = source.index('title="数字人形象"')
assert runtime_idx < avatar_idx
assert "runtimeConfig: RuntimeConfigResponse | null" in source
assert "onRuntimeConfigApply" in source
assert "应用配置" in source
def test_runtime_config_inputs_do_not_reveal_keys() -> None:
settings_source = (ROOT / "apps/web/src/components/SettingsPanel.tsx").read_text(encoding="utf-8")
api_source = (ROOT / "apps/web/src/lib/api.ts").read_text(encoding="utf-8")
assert "llmApiKey" in settings_source
assert "sttApiKey" in settings_source
assert "ttsApiKey" in settings_source
assert 'type="password"' in settings_source
assert 'llmApiKey: ""' in settings_source
assert 'sttApiKey: ""' in settings_source
assert 'ttsApiKey: ""' in settings_source
assert "api_key_set: boolean" in api_source
assert "api_key: string" not in api_source
def test_app_loads_and_applies_runtime_config() -> None:
app_source = (ROOT / "apps/web/src/App.tsx").read_text(encoding="utf-8")
api_source = (ROOT / "apps/web/src/lib/api.ts").read_text(encoding="utf-8")
assert "loadRuntimeConfig" in api_source
assert "applyRuntimeConfig" in api_source
assert "refreshRuntimeConfig" in app_source
assert "handleApplyRuntimeConfig" in app_source
assert "setRuntimeConfig(next)" in app_source
assert "setAsrProvider" in app_source
assert "setTtsProvider" in app_source

View File

@@ -114,6 +114,28 @@ def test_quickstart_process_launch_does_not_require_setsid_on_macos(relpath: str
assert "start_new_session=True" in helpers
def test_start_frontend_requires_requested_port() -> None:
source = (REPO_ROOT / "scripts/quickstart/start_frontend.sh").read_text(encoding="utf-8")
assert 'quickstart_port_in_use "$web_port"' in source
assert "--strictPort" in source
def test_stop_all_finds_vite_residue_from_web_cwd() -> None:
source = (REPO_ROOT / "scripts/quickstart/stop_all.sh").read_text(encoding="utf-8")
assert 'web_dir="$repo_root/apps/web"' in source
assert 'readlink -f "/proc/$pid/cwd"' in source
assert "vite .*--port $port" in source
def test_start_omnirt_quicktalk_detaches_long_running_server() -> None:
source = (REPO_ROOT / "scripts/quickstart/start_omnirt_quicktalk.sh").read_text(encoding="utf-8")
assert "quickstart_detach" in source
assert ') >"$log_file" 2>&1 &' not in source
def test_start_opentalking_resolves_ffmpeg_fallback() -> None:
source = (REPO_ROOT / "scripts/quickstart/start_opentalking.sh").read_text(encoding="utf-8")
helpers = (REPO_ROOT / "scripts/quickstart/_helpers.sh").read_text(encoding="utf-8")
@@ -123,6 +145,13 @@ def test_start_opentalking_resolves_ffmpeg_fallback() -> None:
assert "imageio_ffmpeg.get_ffmpeg_exe()" in helpers
def test_start_opentalking_revalidates_existing_pid_with_http_probe() -> None:
source = (REPO_ROOT / "scripts/quickstart/start_opentalking.sh").read_text(encoding="utf-8")
assert 'curl --max-time 2 -fsS "http://127.0.0.1:$api_port/models"' in source
assert "Stale OpenTalking API pid file" in source
def test_quickstart_source_ascend_env_tolerates_unset_ld_library_path(tmp_path: Path) -> None:
if shutil.which("bash") is None:
pytest.skip("bash is not available")

View File

@@ -8,6 +8,7 @@ import numpy as np
from PIL import Image
import pytest
from opentalking.core.config import get_settings
from opentalking.core.model_config import clear_model_config_cache
from opentalking.media.frame_avatar import resize_reference_image_to_video
from opentalking.pipeline.speak.synthesis_runner import FlashTalkRunner
@@ -193,6 +194,21 @@ def test_fasterliveportrait_preloads_tts_openers_for_default_voice(
assert runner._tts_opener_preload_voice() == "zh-CN-XiaoxiaoNeural"
def test_fasterliveportrait_tts_opener_preload_uses_edge_voice_when_tts_voice_changes(
monkeypatch,
) -> None:
monkeypatch.setenv("OPENTALKING_TTS_VOICE", "alloy")
monkeypatch.setenv("OPENTALKING_TTS_EDGE_VOICE", "zh-CN-XiaoxiaoNeural")
get_settings.cache_clear()
runner = FlashTalkRunner.__new__(FlashTalkRunner)
runner.model_type = "fasterliveportrait"
try:
assert runner._tts_opener_preload_voice() == "zh-CN-XiaoxiaoNeural"
finally:
get_settings.cache_clear()
def test_fasterliveportrait_video_config_preserves_reference_aspect_ratio(
tmp_path,
monkeypatch,