import subprocess from pathlib import Path from typing import Optional from fastapi import FastAPI from pydantic import BaseModel, Field app = FastAPI(title="Emotion Museum TTS") BASE_DIR = Path(__file__).resolve().parent PIPER_BIN = BASE_DIR / ".venv" / "bin" / "piper" PIPER_MODEL = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx" PIPER_CONFIG = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx.json" class SynthesizeRequest(BaseModel): text: str = Field(min_length=1, max_length=5000) voice: str = "default_zh_female" outputPath: str speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4) pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0) emotion: Optional[str] = "story" def clamp(value: float, minimum: float, maximum: float) -> float: return max(minimum, min(maximum, value)) def resolve_piper_args(request: SynthesizeRequest) -> list[str]: speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4) emotion = (request.emotion or "story").lower() length_scale = round(1.0 / speech_rate, 2) sentence_silence = 0.46 noise_scale = 0.64 noise_w = 0.72 if emotion in {"calm", "soft", "warm"}: sentence_silence = 0.5 noise_scale = 0.58 noise_w = 0.68 elif emotion in {"story", "narration", "expressive"}: sentence_silence = 0.48 noise_scale = 0.68 noise_w = 0.76 return [ "--sentence-silence", str(sentence_silence), "--length_scale", str(length_scale), "--noise_scale", str(noise_scale), "--noise_w", str(noise_w), ] @app.get("/health") def health(): return { "status": "ok", "engine": "piper", "modelReady": PIPER_MODEL.exists() and PIPER_CONFIG.exists(), } @app.post("/synthesize") def synthesize(request: SynthesizeRequest): output = Path(request.outputPath) output.parent.mkdir(parents=True, exist_ok=True) try: if not PIPER_BIN.exists(): raise RuntimeError(f"piper binary not found: {PIPER_BIN}") if not PIPER_MODEL.exists() or not PIPER_CONFIG.exists(): raise RuntimeError("piper Chinese voice model is not installed") subprocess.run( [ str(PIPER_BIN), "--model", str(PIPER_MODEL), "--config", str(PIPER_CONFIG), "--output_file", str(output), *resolve_piper_args(request), ], input=request.text, text=True, check=True, capture_output=True, timeout=180, ) except Exception as exc: return { "success": False, "audioPath": None, "durationMs": None, "engine": "piper", "errorMessage": str(exc), } return { "success": True, "audioPath": str(output), "durationMs": None, "engine": "piper", }