Files
peanut 89fc42819d feat: AI 场景路由、ASR 服务及前后端全链路同步
- 新增 AI 场景路由控制器和管理接口
- 新增 ASR 语音识别服务及前后端集成
- 同步 AI Runtime 客户端到 Web/小程序/Life-Script
- 完善 AI 配置测试修复和管理后台路由配置
- 新增数据库迁移脚本

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 13:25:21 +08:00

115 lines
3.5 KiB
Python

import os
import sys
import tempfile
import time
import types
import importlib.machinery
from pathlib import Path
from threading import Lock
from fastapi import FastAPI, File, UploadFile
app = FastAPI(title="Emotion Museum ASR")
MODEL_NAME = os.getenv("ASR_MODEL", "/data/programs/emotion-museum/asr-service/models/paraformer-zh-onnx")
DEVICE = os.getenv("ASR_DEVICE", "cpu")
WORK_DIR = Path(os.getenv("ASR_WORK_DIR", "/tmp/emotion-museum-asr"))
WORK_DIR.mkdir(parents=True, exist_ok=True)
_model = None
_model_lock = Lock()
def get_model():
global _model
with _model_lock:
if _model is None:
# funasr-onnx imports the optional SenseVoice module from package
# __init__, which imports torch even when we only use Paraformer.
# This service intentionally runs the ONNX path without PyTorch.
if "torch" not in sys.modules:
torch_stub = types.ModuleType("torch")
torch_stub.__spec__ = importlib.machinery.ModuleSpec("torch", loader=None)
torch_stub.Tensor = type("Tensor", (), {})
sys.modules["torch"] = torch_stub
from funasr_onnx import Paraformer
_model = Paraformer(
MODEL_NAME,
batch_size=1,
device_id=-1,
quantize=True,
intra_op_num_threads=2,
)
return _model
def clean_text(text):
if isinstance(text, (list, tuple)):
text = text[0] if text else ""
if not text:
return ""
markers = ["<|zh|>", "<|en|>", "<|yue|>", "<|ja|>", "<|ko|>", "<|nospeech|>", "<|withitn|>", "<|woitn|>"]
for marker in markers:
text = text.replace(marker, "")
return text.strip()
@app.get("/health")
def health():
return {
"status": "ok",
"engine": "funasr-onnx",
"model": MODEL_NAME,
"device": DEVICE,
"loaded": _model is not None,
}
@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
started = time.time()
suffix = Path(file.filename or "audio.wav").suffix or ".wav"
tmp_path = None
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=WORK_DIR) as tmp:
tmp_path = Path(tmp.name)
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
tmp.write(chunk)
model = get_model()
result = model([str(tmp_path)])
first = result[0] if isinstance(result, list) and result else result
text = clean_text(first.get("preds", first.get("text", "")) if isinstance(first, dict) else str(first or ""))
language = first.get("language") if isinstance(first, dict) else None
return {
"success": bool(text),
"text": text,
"language": language,
"durationMs": int((time.time() - started) * 1000),
"engine": "funasr-onnx",
"model": MODEL_NAME,
"errorMessage": None if text else "empty recognition result",
}
except Exception as exc:
return {
"success": False,
"text": "",
"language": None,
"durationMs": int((time.time() - started) * 1000),
"engine": "funasr-onnx",
"model": MODEL_NAME,
"errorMessage": str(exc),
}
finally:
if tmp_path:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass