feat: TTS 服务功能完善(任务管理、配置优化、客户端实现)

This commit is contained in:
2026-05-26 20:49:58 +08:00
parent 2d7776dd4d
commit c289097ca0
11 changed files with 307 additions and 39 deletions
+38 -2
View File
@@ -1,5 +1,6 @@
import subprocess
from pathlib import Path
from typing import Optional
from fastapi import FastAPI
from pydantic import BaseModel, Field
@@ -16,6 +17,42 @@ class SynthesizeRequest(BaseModel):
text: str = Field(min_length=1, max_length=5000)
voice: str = "default_zh_female"
outputPath: str
speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4)
pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0)
emotion: Optional[str] = "story"
def clamp(value: float, minimum: float, maximum: float) -> float:
return max(minimum, min(maximum, value))
def resolve_piper_args(request: SynthesizeRequest) -> list[str]:
speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4)
emotion = (request.emotion or "story").lower()
length_scale = round(1.0 / speech_rate, 2)
sentence_silence = 0.46
noise_scale = 0.64
noise_w = 0.72
if emotion in {"calm", "soft", "warm"}:
sentence_silence = 0.5
noise_scale = 0.58
noise_w = 0.68
elif emotion in {"story", "narration", "expressive"}:
sentence_silence = 0.48
noise_scale = 0.68
noise_w = 0.76
return [
"--sentence-silence",
str(sentence_silence),
"--length_scale",
str(length_scale),
"--noise_scale",
str(noise_scale),
"--noise_w",
str(noise_w),
]
@app.get("/health")
@@ -47,8 +84,7 @@ def synthesize(request: SynthesizeRequest):
str(PIPER_CONFIG),
"--output_file",
str(output),
"--sentence-silence",
"0.35",
*resolve_piper_args(request),
],
input=request.text,
text=True,