happy-life-star/backend-single/tts-service/app.py

import subprocess
from pathlib import Path
from typing import Optional

from fastapi import FastAPI
from pydantic import BaseModel, Field

app = FastAPI(title="Emotion Museum TTS")

BASE_DIR = Path(__file__).resolve().parent
PIPER_BIN = BASE_DIR / ".venv" / "bin" / "piper"
PIPER_MODEL = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx"
PIPER_CONFIG = BASE_DIR / "models" / "zh_CN-huayan-medium.onnx.json"


class SynthesizeRequest(BaseModel):
    text: str = Field(min_length=1, max_length=5000)
    voice: str = "default_zh_female"
    outputPath: str
    speechRate: Optional[float] = Field(default=0.92, ge=0.6, le=1.4)
    pitch: Optional[float] = Field(default=0.0, ge=-20.0, le=20.0)
    emotion: Optional[str] = "story"


def clamp(value: float, minimum: float, maximum: float) -> float:
    return max(minimum, min(maximum, value))


def resolve_piper_args(request: SynthesizeRequest) -> list[str]:
    speech_rate = clamp(float(request.speechRate or 0.92), 0.6, 1.4)
    emotion = (request.emotion or "story").lower()
    length_scale = round(1.0 / speech_rate, 2)
    sentence_silence = 0.46
    noise_scale = 0.64
    noise_w = 0.72

    if emotion in {"calm", "soft", "warm"}:
        sentence_silence = 0.5
        noise_scale = 0.58
        noise_w = 0.68
    elif emotion in {"story", "narration", "expressive"}:
        sentence_silence = 0.48
        noise_scale = 0.68
        noise_w = 0.76

    return [
        "--sentence-silence",
        str(sentence_silence),
        "--length_scale",
        str(length_scale),
        "--noise_scale",
        str(noise_scale),
        "--noise_w",
        str(noise_w),
    ]


@app.get("/health")
def health():
    return {
        "status": "ok",
        "engine": "piper",
        "modelReady": PIPER_MODEL.exists() and PIPER_CONFIG.exists(),
    }


@app.post("/synthesize")
def synthesize(request: SynthesizeRequest):
    output = Path(request.outputPath)
    output.parent.mkdir(parents=True, exist_ok=True)

    try:
        if not PIPER_BIN.exists():
            raise RuntimeError(f"piper binary not found: {PIPER_BIN}")
        if not PIPER_MODEL.exists() or not PIPER_CONFIG.exists():
            raise RuntimeError("piper Chinese voice model is not installed")

        subprocess.run(
            [
                str(PIPER_BIN),
                "--model",
                str(PIPER_MODEL),
                "--config",
                str(PIPER_CONFIG),
                "--output_file",
                str(output),
                *resolve_piper_args(request),
            ],
            input=request.text,
            text=True,
            check=True,
            capture_output=True,
            timeout=180,
        )
    except Exception as exc:
        return {
            "success": False,
            "audioPath": None,
            "durationMs": None,
            "engine": "piper",
            "errorMessage": str(exc),
        }

    return {
        "success": True,
        "audioPath": str(output),
        "durationMs": None,
        "engine": "piper",
    }