Files

110 lines
3.0 KiB
Python

import subprocess
from pathlib import Path
from typing import Optional
from fastapi import FastAPI
from pydantic import BaseModel, Field
app = FastAPI(title="Emotion Museum TTS")
BASE_DIR = Path(__file__).resolve().parent
PIPER_BIN = BASE_DIR / ".venv" / "bin" / "piper"
PIPER_MODEL = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx"
PIPER_CONFIG = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx.json"
class SynthesizeRequest(BaseModel):
text: str = Field(min_length=1, max_length=5000)
voice: str = "default_zh_female"
outputPath: str
speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4)
pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0)
emotion: Optional[str] = "story"
def clamp(value: float, minimum: float, maximum: float) -> float:
return max(minimum, min(maximum, value))
def resolve_piper_args(request: SynthesizeRequest) -> list[str]:
speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4)
emotion = (request.emotion or "story").lower()
length_scale = round(1.0 / speech_rate, 2)
sentence_silence = 0.46
noise_scale = 0.64
noise_w = 0.72
if emotion in {"calm", "soft", "warm"}:
sentence_silence = 0.5
noise_scale = 0.58
noise_w = 0.68
elif emotion in {"story", "narration", "expressive"}:
sentence_silence = 0.48
noise_scale = 0.68
noise_w = 0.76
return [
"--sentence-silence",
str(sentence_silence),
"--length_scale",
str(length_scale),
"--noise_scale",
str(noise_scale),
"--noise_w",
str(noise_w),
]
@app.get("/health")
def health():
return {
"status": "ok",
"engine": "piper",
"modelReady": PIPER_MODEL.exists() and PIPER_CONFIG.exists(),
}
@app.post("/synthesize")
def synthesize(request: SynthesizeRequest):
output = Path(request.outputPath)
output.parent.mkdir(parents=True, exist_ok=True)
try:
if not PIPER_BIN.exists():
raise RuntimeError(f"piper binary not found: {PIPER_BIN}")
if not PIPER_MODEL.exists() or not PIPER_CONFIG.exists():
raise RuntimeError("piper Chinese voice model is not installed")
subprocess.run(
[
str(PIPER_BIN),
"--model",
str(PIPER_MODEL),
"--config",
str(PIPER_CONFIG),
"--output_file",
str(output),
*resolve_piper_args(request),
],
input=request.text,
text=True,
check=True,
capture_output=True,
timeout=180,
)
except Exception as exc:
return {
"success": False,
"audioPath": None,
"durationMs": None,
"engine": "piper",
"errorMessage": str(exc),
}
return {
"success": True,
"audioPath": str(output),
"durationMs": None,
"engine": "piper",
}