feat: add script tts backend

This commit is contained in:
2026-05-17 16:36:06 +08:00
parent 1016111d19
commit 6b426c2b68
21 changed files with 808 additions and 5 deletions
@@ -43,8 +43,12 @@ public class WebConfig implements WebMvcConfigurer {
.addPathPatterns("/**")
.excludePathPatterns(
"/auth/**",
"/analytics/events/batch",
"/tts/audio/**",
"/admin/**", // 排除管理员接口,由AdminAuthInterceptor处理
"/error",
"/analytics/events/batch",
"/tts/audio/**",
"/favicon.ico",
"/actuator/**",
"/swagger-ui/**",
@@ -44,6 +44,7 @@ public class WebMvcConfig implements WebMvcConfigurer {
"/auth/refresh-token", // 刷新token接口
"/auth/resetPassword", // 重置密码接口(免登录)
"/analytics/events/batch", // Analytics event batch endpoint
"/tts/audio/**", // Public generated TTS audio files
"/health", // 健康检查接口
"/ws/**", // WebSocket接口
"/swagger-ui/**", // Swagger UI
@@ -4,12 +4,16 @@ import com.emotion.common.Result;
import com.emotion.dto.request.analytics.AnalyticsEventBatchRequest;
import com.emotion.dto.response.analytics.AnalyticsBatchResponse;
import com.emotion.service.AnalyticsService;
import com.emotion.util.JwtUtil;
import com.emotion.util.UserContextHolder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest;
import javax.validation.Valid;
@RestController
@@ -19,8 +23,33 @@ public class AnalyticsController {
@Autowired
private AnalyticsService analyticsService;
@Autowired
private JwtUtil jwtUtil;
@PostMapping("/events/batch")
public Result<AnalyticsBatchResponse> batch(@Valid @RequestBody AnalyticsEventBatchRequest request) {
return Result.success(analyticsService.ingestBatch(request));
public Result<AnalyticsBatchResponse> batch(@Valid @RequestBody AnalyticsEventBatchRequest request,
HttpServletRequest servletRequest) {
bindOptionalUser(servletRequest);
try {
return Result.success(analyticsService.ingestBatch(request));
} finally {
UserContextHolder.clear();
}
}
private void bindOptionalUser(HttpServletRequest request) {
String authHeader = request.getHeader("Authorization");
if (!StringUtils.hasText(authHeader) || !authHeader.startsWith("Bearer ")) {
return;
}
String token = authHeader.substring(7);
if (!jwtUtil.validateToken(token)) {
return;
}
UserContextHolder.setCurrentUserId(jwtUtil.getUserIdFromToken(token));
UserContextHolder.setCurrentUsername(jwtUtil.getUsernameFromToken(token));
UserContextHolder.setCurrentToken(token);
}
}
@@ -0,0 +1,81 @@
package com.emotion.controller;
import com.emotion.common.Result;
import com.emotion.dto.request.tts.TtsTaskCreateRequest;
import com.emotion.dto.response.tts.TtsTaskResponse;
import com.emotion.service.TtsTaskService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.CacheControl;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import javax.validation.Valid;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.TimeUnit;
@RestController
@RequestMapping("/tts")
public class TtsController {
private final TtsTaskService ttsTaskService;
@Value("${emotion.tts.output-dir:/data/uploads/emotion-museum/tts}")
private String outputDir;
public TtsController(TtsTaskService ttsTaskService) {
this.ttsTaskService = ttsTaskService;
}
@PostMapping("/tasks")
public Result<TtsTaskResponse> create(@Valid @RequestBody TtsTaskCreateRequest request) {
try {
return Result.success(ttsTaskService.createOrReuse(request));
} catch (IllegalArgumentException | IllegalStateException e) {
return Result.badRequest(e.getMessage());
}
}
@GetMapping("/tasks/{id}")
public Result<TtsTaskResponse> detail(@PathVariable String id) {
TtsTaskResponse response = ttsTaskService.getTask(id);
return response == null ? Result.notFound("TTS task not found") : Result.success(response);
}
@GetMapping("/tasks/by-source")
public Result<TtsTaskResponse> bySource(@RequestParam String sourceType,
@RequestParam String sourceId,
@RequestParam(required = false) String voice) {
return Result.success(ttsTaskService.getBySource(sourceType, sourceId, voice));
}
@GetMapping("/audio/{filename:.+}")
public ResponseEntity<Resource> audio(@PathVariable String filename) {
if (filename.contains("..") || filename.contains("/") || filename.contains("\\")) {
return ResponseEntity.badRequest().build();
}
Path path = Paths.get(outputDir).resolve(filename).normalize();
FileSystemResource resource = new FileSystemResource(path);
if (!resource.exists() || !resource.isReadable()) {
return ResponseEntity.notFound().build();
}
MediaType mediaType = filename.endsWith(".wav")
? MediaType.valueOf("audio/wav")
: MediaType.valueOf("audio/mpeg");
return ResponseEntity.ok()
.contentType(mediaType)
.cacheControl(CacheControl.maxAge(30, TimeUnit.DAYS).cachePublic())
.body(resource);
}
}
@@ -0,0 +1,21 @@
package com.emotion.dto.request.tts;
import lombok.Data;
import javax.validation.constraints.NotBlank;
import javax.validation.constraints.Size;
@Data
public class TtsTaskCreateRequest {
@NotBlank
@Size(max = 50)
private String sourceType;
@NotBlank
@Size(max = 64)
private String sourceId;
@Size(max = 64)
private String voice;
}
@@ -0,0 +1,18 @@
package com.emotion.dto.response.tts;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class TtsTaskResponse {
private String id;
private String sourceType;
private String sourceId;
private String status;
private String voice;
private String audioUrl;
private Long durationMs;
private String errorMessage;
}
@@ -0,0 +1,55 @@
package com.emotion.entity;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.emotion.common.BaseEntity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;
@Data
@EqualsAndHashCode(callSuper = true)
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
@TableName("t_tts_task")
public class TtsTask extends BaseEntity {
@TableField("user_id")
private String userId;
@TableField("source_type")
private String sourceType;
@TableField("source_id")
private String sourceId;
@TableField("text_hash")
private String textHash;
@TableField("text_length")
private Integer textLength;
@TableField("voice")
private String voice;
@TableField("status")
private String status;
@TableField("audio_url")
private String audioUrl;
@TableField("audio_path")
private String audioPath;
@TableField("duration_ms")
private Long durationMs;
@TableField("error_message")
private String errorMessage;
@TableField("request_count")
private Integer requestCount;
}
@@ -0,0 +1,9 @@
package com.emotion.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.emotion.entity.TtsTask;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface TtsTaskMapper extends BaseMapper<TtsTask> {
}
@@ -0,0 +1,36 @@
package com.emotion.service;
public interface TtsEngineClient {
TtsEngineResult synthesize(String text, String voice, String outputPath);
class TtsEngineResult {
private final boolean success;
private final String audioPath;
private final Long durationMs;
private final String errorMessage;
public TtsEngineResult(boolean success, String audioPath, Long durationMs, String errorMessage) {
this.success = success;
this.audioPath = audioPath;
this.durationMs = durationMs;
this.errorMessage = errorMessage;
}
public boolean isSuccess() {
return success;
}
public String getAudioPath() {
return audioPath;
}
public Long getDurationMs() {
return durationMs;
}
public String getErrorMessage() {
return errorMessage;
}
}
}
@@ -0,0 +1,15 @@
package com.emotion.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.emotion.dto.request.tts.TtsTaskCreateRequest;
import com.emotion.dto.response.tts.TtsTaskResponse;
import com.emotion.entity.TtsTask;
public interface TtsTaskService extends IService<TtsTask> {
TtsTaskResponse createOrReuse(TtsTaskCreateRequest request);
TtsTaskResponse getTask(String id);
TtsTaskResponse getBySource(String sourceType, String sourceId, String voice);
}
@@ -0,0 +1,46 @@
package com.emotion.service.impl;
import com.emotion.service.TtsEngineClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import java.util.Map;
@Service
public class HttpTtsEngineClient implements TtsEngineClient {
private final RestTemplate restTemplate;
@Value("${emotion.tts.engine-url:http://127.0.0.1:19110}")
private String engineUrl;
public HttpTtsEngineClient(RestTemplate restTemplate) {
this.restTemplate = restTemplate;
}
@Override
public TtsEngineResult synthesize(String text, String voice, String outputPath) {
try {
Map<String, Object> body = Map.of(
"text", text,
"voice", voice,
"outputPath", outputPath
);
ResponseEntity<Map> response = restTemplate.postForEntity(engineUrl + "/synthesize", body, Map.class);
Map<?, ?> data = response.getBody();
boolean success = data != null && Boolean.TRUE.equals(data.get("success"));
if (!success) {
String message = data == null ? "empty response" : String.valueOf(data.get("errorMessage"));
return new TtsEngineResult(false, null, null, message);
}
Long durationMs = data.get("durationMs") instanceof Number
? ((Number) data.get("durationMs")).longValue()
: null;
return new TtsEngineResult(true, String.valueOf(data.get("audioPath")), durationMs, null);
} catch (Exception e) {
return new TtsEngineResult(false, null, null, e.getMessage());
}
}
}
@@ -0,0 +1,291 @@
package com.emotion.service.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.emotion.dto.request.tts.TtsTaskCreateRequest;
import com.emotion.dto.response.tts.TtsTaskResponse;
import com.emotion.entity.EpicScript;
import com.emotion.entity.TtsTask;
import com.emotion.mapper.EpicScriptMapper;
import com.emotion.mapper.TtsTaskMapper;
import com.emotion.service.TtsEngineClient;
import com.emotion.service.TtsTaskService;
import com.emotion.util.UserContextHolder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.DigestUtils;
import org.springframework.util.StringUtils;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
@Slf4j
@Service
public class TtsTaskServiceImpl extends ServiceImpl<TtsTaskMapper, TtsTask> implements TtsTaskService {
private static final String SOURCE_TYPE_EPIC_SCRIPT = "epic_script";
private static final String STATUS_PENDING = "pending";
private static final String STATUS_PROCESSING = "processing";
private static final String STATUS_SUCCESS = "success";
private static final String STATUS_FAILED = "failed";
private final EpicScriptMapper epicScriptMapper;
private final TtsEngineClient ttsEngineClient;
private final Executor taskExecutor;
@Value("${emotion.tts.enabled:true}")
private boolean enabled;
@Value("${emotion.tts.output-dir:/data/uploads/emotion-museum/tts}")
private String outputDir;
@Value("${emotion.tts.public-url-prefix:/uploads/emotion-museum/tts}")
private String publicUrlPrefix;
@Value("${emotion.tts.max-text-length:5000}")
private int maxTextLength;
@Value("${emotion.tts.default-voice:default_zh_female}")
private String defaultVoice;
public TtsTaskServiceImpl(EpicScriptMapper epicScriptMapper,
TtsEngineClient ttsEngineClient,
@Qualifier("taskExecutor") Executor taskExecutor) {
this.epicScriptMapper = epicScriptMapper;
this.ttsEngineClient = ttsEngineClient;
this.taskExecutor = taskExecutor;
}
@Override
public TtsTaskResponse createOrReuse(TtsTaskCreateRequest request) {
if (!enabled) {
throw new IllegalStateException("TTS service is disabled");
}
String userId = currentUserId();
String sourceType = normalizeSourceType(request.getSourceType());
String sourceId = request.getSourceId().trim();
String voice = resolveVoice(request.getVoice());
String cleaned = cleanText(loadSourceText(userId, sourceType, sourceId));
if (!StringUtils.hasText(cleaned)) {
throw new IllegalArgumentException("Source text is empty");
}
if (cleaned.length() > maxTextLength) {
cleaned = cleaned.substring(0, maxTextLength);
}
String hash = DigestUtils.md5DigestAsHex((voice + "\n" + cleaned).getBytes(StandardCharsets.UTF_8));
TtsTask owned = findOwnedTask(userId, sourceType, sourceId, voice, hash);
if (owned != null) {
incrementRequestCount(owned);
return toResponse(owned);
}
TtsTask cachedSuccess = findSuccessfulCache(voice, hash);
if (cachedSuccess != null && StringUtils.hasText(cachedSuccess.getAudioUrl())) {
TtsTask task = buildTask(userId, sourceType, sourceId, voice, hash, cleaned.length());
task.setStatus(STATUS_SUCCESS);
task.setAudioPath(cachedSuccess.getAudioPath());
task.setAudioUrl(cachedSuccess.getAudioUrl());
task.setDurationMs(cachedSuccess.getDurationMs());
save(task);
incrementRequestCount(cachedSuccess);
return toResponse(task);
}
TtsTask task = buildTask(userId, sourceType, sourceId, voice, hash, cleaned.length());
save(task);
String synthesisText = cleaned;
CompletableFuture.runAsync(() -> process(task.getId(), synthesisText, voice, task.getAudioPath()), taskExecutor);
return toResponse(task);
}
@Override
public TtsTaskResponse getTask(String id) {
String userId = currentUserId();
TtsTask task = getById(id);
if (task == null || !userId.equals(task.getUserId())) {
return null;
}
return toResponse(task);
}
@Override
public TtsTaskResponse getBySource(String sourceType, String sourceId, String voice) {
String userId = currentUserId();
TtsTask task = getOne(new LambdaQueryWrapper<TtsTask>()
.eq(TtsTask::getUserId, userId)
.eq(TtsTask::getSourceType, normalizeSourceType(sourceType))
.eq(TtsTask::getSourceId, sourceId)
.eq(TtsTask::getVoice, resolveVoice(voice))
.eq(TtsTask::getIsDeleted, 0)
.orderByDesc(TtsTask::getCreateTime)
.last("LIMIT 1"));
return task == null ? null : toResponse(task);
}
private void process(String taskId, String text, String voice, String outputPath) {
try {
TtsTask task = getById(taskId);
if (task == null) {
return;
}
task.setStatus(STATUS_PROCESSING);
task.setErrorMessage(null);
updateById(task);
TtsEngineClient.TtsEngineResult result = ttsEngineClient.synthesize(text, voice, outputPath);
task = getById(taskId);
if (task == null) {
return;
}
if (result.isSuccess()) {
task.setStatus(STATUS_SUCCESS);
task.setDurationMs(result.getDurationMs());
if (StringUtils.hasText(result.getAudioPath())) {
task.setAudioPath(result.getAudioPath());
}
task.setErrorMessage(null);
} else {
task.setStatus(STATUS_FAILED);
task.setErrorMessage(limitError(result.getErrorMessage()));
}
updateById(task);
} catch (Exception e) {
log.warn("TTS task processing failed, taskId={}", taskId, e);
TtsTask task = getById(taskId);
if (task != null) {
task.setStatus(STATUS_FAILED);
task.setErrorMessage(limitError(e.getMessage()));
updateById(task);
}
}
}
private TtsTask buildTask(String userId, String sourceType, String sourceId, String voice, String hash, int textLength) {
String filename = hash + ".mp3";
return TtsTask.builder()
.userId(userId)
.sourceType(sourceType)
.sourceId(sourceId)
.textHash(hash)
.textLength(textLength)
.voice(voice)
.status(STATUS_PENDING)
.audioPath(joinPath(outputDir, filename))
.audioUrl(joinPath(publicUrlPrefix, filename))
.requestCount(1)
.build();
}
private TtsTask findOwnedTask(String userId, String sourceType, String sourceId, String voice, String hash) {
return getOne(new LambdaQueryWrapper<TtsTask>()
.eq(TtsTask::getUserId, userId)
.eq(TtsTask::getSourceType, sourceType)
.eq(TtsTask::getSourceId, sourceId)
.eq(TtsTask::getVoice, voice)
.eq(TtsTask::getTextHash, hash)
.eq(TtsTask::getIsDeleted, 0)
.orderByDesc(TtsTask::getCreateTime)
.last("LIMIT 1"));
}
private TtsTask findSuccessfulCache(String voice, String hash) {
return getOne(new LambdaQueryWrapper<TtsTask>()
.eq(TtsTask::getTextHash, hash)
.eq(TtsTask::getVoice, voice)
.eq(TtsTask::getStatus, STATUS_SUCCESS)
.eq(TtsTask::getIsDeleted, 0)
.orderByDesc(TtsTask::getCreateTime)
.last("LIMIT 1"));
}
private void incrementRequestCount(TtsTask task) {
task.setRequestCount((task.getRequestCount() == null ? 0 : task.getRequestCount()) + 1);
updateById(task);
}
private String loadSourceText(String userId, String sourceType, String sourceId) {
if (!SOURCE_TYPE_EPIC_SCRIPT.equals(sourceType)) {
throw new IllegalArgumentException("Unsupported sourceType");
}
EpicScript script = epicScriptMapper.selectById(sourceId);
if (script == null || !userId.equals(script.getUserId())) {
throw new IllegalArgumentException("Script not found");
}
StringBuilder text = new StringBuilder();
append(text, script.getTitle());
append(text, script.getPlotIntro());
append(text, script.getPlotTurning());
append(text, script.getPlotClimax());
append(text, script.getPlotEnding());
Map<String, Object> plotJson = script.getPlotJson();
if (plotJson != null && plotJson.get("fullContent") != null) {
append(text, String.valueOf(plotJson.get("fullContent")));
}
return text.toString();
}
public static String cleanText(String text) {
if (text == null) {
return "";
}
return text.replaceAll("[#>*_`\\-]", "")
.replaceAll("\\s+", " ")
.trim();
}
private TtsTaskResponse toResponse(TtsTask task) {
return TtsTaskResponse.builder()
.id(task.getId())
.sourceType(task.getSourceType())
.sourceId(task.getSourceId())
.status(task.getStatus())
.voice(task.getVoice())
.audioUrl(STATUS_SUCCESS.equals(task.getStatus()) ? task.getAudioUrl() : null)
.durationMs(task.getDurationMs())
.errorMessage(task.getErrorMessage())
.build();
}
private String currentUserId() {
String userId = UserContextHolder.getCurrentUserId();
if (!StringUtils.hasText(userId)) {
throw new IllegalArgumentException("User not logged in");
}
return userId;
}
private String normalizeSourceType(String sourceType) {
return StringUtils.hasText(sourceType) ? sourceType.trim() : SOURCE_TYPE_EPIC_SCRIPT;
}
private String resolveVoice(String voice) {
return StringUtils.hasText(voice) ? voice.trim() : defaultVoice;
}
private static void append(StringBuilder text, String value) {
if (StringUtils.hasText(value)) {
text.append(value).append("\n\n");
}
}
private static String joinPath(String prefix, String filename) {
if (prefix.endsWith("/")) {
return prefix + filename;
}
return prefix + "/" + filename;
}
private static String limitError(String message) {
if (message == null) {
return "TTS synthesis failed";
}
return message.length() > 1000 ? message.substring(0, 1000) : message;
}
}
@@ -55,6 +55,15 @@ emotion:
# 文件上传路径 - 生产环境
upload:
path: /data/uploads/emotion-museum
# Text-to-speech config
tts:
enabled: true
engine-url: http://127.0.0.1:19110
output-dir: /data/uploads/emotion-museum/tts
public-url-prefix: /tts/audio
max-text-length: 5000
default-voice: default_zh_female
# 生产模式配置
prod:
@@ -93,6 +93,15 @@ emotion:
max-file-size: 10MB
allowed-types: jpg,jpeg,png,gif,pdf,doc,docx
# Text-to-speech config
tts:
enabled: true
engine-url: http://127.0.0.1:19110
output-dir: /data/uploads/emotion-museum/tts
public-url-prefix: /tts/audio
max-text-length: 5000
default-voice: default_zh_female
# 安全配置
security:
ignore-urls: