feat: TTS 服务功能完善(任务管理、配置优化、客户端实现)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -16,6 +17,42 @@ class SynthesizeRequest(BaseModel):
|
||||
text: str = Field(min_length=1, max_length=5000)
|
||||
voice: str = "default_zh_female"
|
||||
outputPath: str
|
||||
speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4)
|
||||
pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0)
|
||||
emotion: Optional[str] = "story"
|
||||
|
||||
|
||||
def clamp(value: float, minimum: float, maximum: float) -> float:
|
||||
return max(minimum, min(maximum, value))
|
||||
|
||||
|
||||
def resolve_piper_args(request: SynthesizeRequest) -> list[str]:
|
||||
speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4)
|
||||
emotion = (request.emotion or "story").lower()
|
||||
length_scale = round(1.0 / speech_rate, 2)
|
||||
sentence_silence = 0.46
|
||||
noise_scale = 0.64
|
||||
noise_w = 0.72
|
||||
|
||||
if emotion in {"calm", "soft", "warm"}:
|
||||
sentence_silence = 0.5
|
||||
noise_scale = 0.58
|
||||
noise_w = 0.68
|
||||
elif emotion in {"story", "narration", "expressive"}:
|
||||
sentence_silence = 0.48
|
||||
noise_scale = 0.68
|
||||
noise_w = 0.76
|
||||
|
||||
return [
|
||||
"--sentence-silence",
|
||||
str(sentence_silence),
|
||||
"--length_scale",
|
||||
str(length_scale),
|
||||
"--noise_scale",
|
||||
str(noise_scale),
|
||||
"--noise_w",
|
||||
str(noise_w),
|
||||
]
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
@@ -47,8 +84,7 @@ def synthesize(request: SynthesizeRequest):
|
||||
str(PIPER_CONFIG),
|
||||
"--output_file",
|
||||
str(output),
|
||||
"--sentence-silence",
|
||||
"0.35",
|
||||
*resolve_piper_args(request),
|
||||
],
|
||||
input=request.text,
|
||||
text=True,
|
||||
|
||||
Reference in New Issue
Block a user