import os import sys import tempfile import time import types import importlib.machinery from pathlib import Path from threading import Lock from fastapi import FastAPI, File, UploadFile app = FastAPI(title="Emotion Museum ASR") MODEL_NAME = os.getenv("ASR_MODEL", "/data/programs/emotion-museum/asr-service/models/paraformer-zh-onnx") DEVICE = os.getenv("ASR_DEVICE", "cpu") WORK_DIR = Path(os.getenv("ASR_WORK_DIR", "/tmp/emotion-museum-asr")) WORK_DIR.mkdir(parents=True, exist_ok=True) _model = None _model_lock = Lock() def get_model(): global _model with _model_lock: if _model is None: # funasr-onnx imports the optional SenseVoice module from package # __init__, which imports torch even when we only use Paraformer. # This service intentionally runs the ONNX path without PyTorch. if "torch" not in sys.modules: torch_stub = types.ModuleType("torch") torch_stub.__spec__ = importlib.machinery.ModuleSpec("torch", loader=None) torch_stub.Tensor = type("Tensor", (), {}) sys.modules["torch"] = torch_stub from funasr_onnx import Paraformer _model = Paraformer( MODEL_NAME, batch_size=1, device_id=-1, quantize=True, intra_op_num_threads=2, ) return _model def clean_text(text): if isinstance(text, (list, tuple)): text = text[0] if text else "" if not text: return "" markers = ["<|zh|>", "<|en|>", "<|yue|>", "<|ja|>", "<|ko|>", "<|nospeech|>", "<|withitn|>", "<|woitn|>"] for marker in markers: text = text.replace(marker, "") return text.strip() @app.get("/health") def health(): return { "status": "ok", "engine": "funasr-onnx", "model": MODEL_NAME, "device": DEVICE, "loaded": _model is not None, } @app.post("/transcribe") async def transcribe(file: UploadFile = File(...)): started = time.time() suffix = Path(file.filename or "audio.wav").suffix or ".wav" tmp_path = None try: with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=WORK_DIR) as tmp: tmp_path = Path(tmp.name) while True: chunk = await file.read(1024 * 1024) if not chunk: break tmp.write(chunk) model = get_model() result = model([str(tmp_path)]) first = result[0] if isinstance(result, list) and result else result text = clean_text(first.get("preds", first.get("text", "")) if isinstance(first, dict) else str(first or "")) language = first.get("language") if isinstance(first, dict) else None return { "success": bool(text), "text": text, "language": language, "durationMs": int((time.time() - started) * 1000), "engine": "funasr-onnx", "model": MODEL_NAME, "errorMessage": None if text else "empty recognition result", } except Exception as exc: return { "success": False, "text": "", "language": None, "durationMs": int((time.time() - started) * 1000), "engine": "funasr-onnx", "model": MODEL_NAME, "errorMessage": str(exc), } finally: if tmp_path: try: tmp_path.unlink(missing_ok=True) except Exception: pass