110 lines
3.0 KiB
Python
110 lines
3.0 KiB
Python
import subprocess
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from fastapi import FastAPI
|
|
from pydantic import BaseModel, Field
|
|
|
|
app = FastAPI(title="Emotion Museum TTS")
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
PIPER_BIN = BASE_DIR / ".venv" / "bin" / "piper"
|
|
PIPER_MODEL = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx"
|
|
PIPER_CONFIG = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx.json"
|
|
|
|
|
|
class SynthesizeRequest(BaseModel):
|
|
text: str = Field(min_length=1, max_length=5000)
|
|
voice: str = "default_zh_female"
|
|
outputPath: str
|
|
speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4)
|
|
pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0)
|
|
emotion: Optional[str] = "story"
|
|
|
|
|
|
def clamp(value: float, minimum: float, maximum: float) -> float:
|
|
return max(minimum, min(maximum, value))
|
|
|
|
|
|
def resolve_piper_args(request: SynthesizeRequest) -> list[str]:
|
|
speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4)
|
|
emotion = (request.emotion or "story").lower()
|
|
length_scale = round(1.0 / speech_rate, 2)
|
|
sentence_silence = 0.46
|
|
noise_scale = 0.64
|
|
noise_w = 0.72
|
|
|
|
if emotion in {"calm", "soft", "warm"}:
|
|
sentence_silence = 0.5
|
|
noise_scale = 0.58
|
|
noise_w = 0.68
|
|
elif emotion in {"story", "narration", "expressive"}:
|
|
sentence_silence = 0.48
|
|
noise_scale = 0.68
|
|
noise_w = 0.76
|
|
|
|
return [
|
|
"--sentence-silence",
|
|
str(sentence_silence),
|
|
"--length_scale",
|
|
str(length_scale),
|
|
"--noise_scale",
|
|
str(noise_scale),
|
|
"--noise_w",
|
|
str(noise_w),
|
|
]
|
|
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
return {
|
|
"status": "ok",
|
|
"engine": "piper",
|
|
"modelReady": PIPER_MODEL.exists() and PIPER_CONFIG.exists(),
|
|
}
|
|
|
|
|
|
@app.post("/synthesize")
|
|
def synthesize(request: SynthesizeRequest):
|
|
output = Path(request.outputPath)
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
if not PIPER_BIN.exists():
|
|
raise RuntimeError(f"piper binary not found: {PIPER_BIN}")
|
|
if not PIPER_MODEL.exists() or not PIPER_CONFIG.exists():
|
|
raise RuntimeError("piper Chinese voice model is not installed")
|
|
|
|
subprocess.run(
|
|
[
|
|
str(PIPER_BIN),
|
|
"--model",
|
|
str(PIPER_MODEL),
|
|
"--config",
|
|
str(PIPER_CONFIG),
|
|
"--output_file",
|
|
str(output),
|
|
*resolve_piper_args(request),
|
|
],
|
|
input=request.text,
|
|
text=True,
|
|
check=True,
|
|
capture_output=True,
|
|
timeout=180,
|
|
)
|
|
except Exception as exc:
|
|
return {
|
|
"success": False,
|
|
"audioPath": None,
|
|
"durationMs": None,
|
|
"engine": "piper",
|
|
"errorMessage": str(exc),
|
|
}
|
|
|
|
return {
|
|
"success": True,
|
|
"audioPath": str(output),
|
|
"durationMs": None,
|
|
"engine": "piper",
|
|
}
|