diff --git a/backend-single/src/main/java/com/emotion/config/WebConfig.java b/backend-single/src/main/java/com/emotion/config/WebConfig.java index 9377052..e879a5b 100644 --- a/backend-single/src/main/java/com/emotion/config/WebConfig.java +++ b/backend-single/src/main/java/com/emotion/config/WebConfig.java @@ -43,8 +43,12 @@ public class WebConfig implements WebMvcConfigurer { .addPathPatterns("/**") .excludePathPatterns( "/auth/**", + "/analytics/events/batch", + "/tts/audio/**", "/admin/**", // 排除管理员接口,由AdminAuthInterceptor处理 "/error", + "/analytics/events/batch", + "/tts/audio/**", "/favicon.ico", "/actuator/**", "/swagger-ui/**", diff --git a/backend-single/src/main/java/com/emotion/config/WebMvcConfig.java b/backend-single/src/main/java/com/emotion/config/WebMvcConfig.java index e7d42f6..c3537c6 100644 --- a/backend-single/src/main/java/com/emotion/config/WebMvcConfig.java +++ b/backend-single/src/main/java/com/emotion/config/WebMvcConfig.java @@ -44,6 +44,7 @@ public class WebMvcConfig implements WebMvcConfigurer { "/auth/refresh-token", // 刷新token接口 "/auth/resetPassword", // 重置密码接口(免登录) "/analytics/events/batch", // Analytics event batch endpoint + "/tts/audio/**", // Public generated TTS audio files "/health", // 健康检查接口 "/ws/**", // WebSocket接口 "/swagger-ui/**", // Swagger UI diff --git a/backend-single/src/main/java/com/emotion/controller/AnalyticsController.java b/backend-single/src/main/java/com/emotion/controller/AnalyticsController.java index 8dcc377..ac6c199 100644 --- a/backend-single/src/main/java/com/emotion/controller/AnalyticsController.java +++ b/backend-single/src/main/java/com/emotion/controller/AnalyticsController.java @@ -4,12 +4,16 @@ import com.emotion.common.Result; import com.emotion.dto.request.analytics.AnalyticsEventBatchRequest; import com.emotion.dto.response.analytics.AnalyticsBatchResponse; import com.emotion.service.AnalyticsService; +import com.emotion.util.JwtUtil; +import com.emotion.util.UserContextHolder; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; +import javax.servlet.http.HttpServletRequest; import javax.validation.Valid; @RestController @@ -19,8 +23,33 @@ public class AnalyticsController { @Autowired private AnalyticsService analyticsService; + @Autowired + private JwtUtil jwtUtil; + @PostMapping("/events/batch") - public Result batch(@Valid @RequestBody AnalyticsEventBatchRequest request) { - return Result.success(analyticsService.ingestBatch(request)); + public Result batch(@Valid @RequestBody AnalyticsEventBatchRequest request, + HttpServletRequest servletRequest) { + bindOptionalUser(servletRequest); + try { + return Result.success(analyticsService.ingestBatch(request)); + } finally { + UserContextHolder.clear(); + } + } + + private void bindOptionalUser(HttpServletRequest request) { + String authHeader = request.getHeader("Authorization"); + if (!StringUtils.hasText(authHeader) || !authHeader.startsWith("Bearer ")) { + return; + } + + String token = authHeader.substring(7); + if (!jwtUtil.validateToken(token)) { + return; + } + + UserContextHolder.setCurrentUserId(jwtUtil.getUserIdFromToken(token)); + UserContextHolder.setCurrentUsername(jwtUtil.getUsernameFromToken(token)); + UserContextHolder.setCurrentToken(token); } } diff --git a/backend-single/src/main/java/com/emotion/controller/TtsController.java b/backend-single/src/main/java/com/emotion/controller/TtsController.java new file mode 100644 index 0000000..c399f7c --- /dev/null +++ b/backend-single/src/main/java/com/emotion/controller/TtsController.java @@ -0,0 +1,81 @@ +package com.emotion.controller; + +import com.emotion.common.Result; +import com.emotion.dto.request.tts.TtsTaskCreateRequest; +import com.emotion.dto.response.tts.TtsTaskResponse; +import com.emotion.service.TtsTaskService; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; +import org.springframework.http.CacheControl; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import javax.validation.Valid; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +@RestController +@RequestMapping("/tts") +public class TtsController { + + private final TtsTaskService ttsTaskService; + + @Value("${emotion.tts.output-dir:/data/uploads/emotion-museum/tts}") + private String outputDir; + + public TtsController(TtsTaskService ttsTaskService) { + this.ttsTaskService = ttsTaskService; + } + + @PostMapping("/tasks") + public Result create(@Valid @RequestBody TtsTaskCreateRequest request) { + try { + return Result.success(ttsTaskService.createOrReuse(request)); + } catch (IllegalArgumentException | IllegalStateException e) { + return Result.badRequest(e.getMessage()); + } + } + + @GetMapping("/tasks/{id}") + public Result detail(@PathVariable String id) { + TtsTaskResponse response = ttsTaskService.getTask(id); + return response == null ? Result.notFound("TTS task not found") : Result.success(response); + } + + @GetMapping("/tasks/by-source") + public Result bySource(@RequestParam String sourceType, + @RequestParam String sourceId, + @RequestParam(required = false) String voice) { + return Result.success(ttsTaskService.getBySource(sourceType, sourceId, voice)); + } + + @GetMapping("/audio/{filename:.+}") + public ResponseEntity audio(@PathVariable String filename) { + if (filename.contains("..") || filename.contains("/") || filename.contains("\\")) { + return ResponseEntity.badRequest().build(); + } + + Path path = Paths.get(outputDir).resolve(filename).normalize(); + FileSystemResource resource = new FileSystemResource(path); + if (!resource.exists() || !resource.isReadable()) { + return ResponseEntity.notFound().build(); + } + + MediaType mediaType = filename.endsWith(".wav") + ? MediaType.valueOf("audio/wav") + : MediaType.valueOf("audio/mpeg"); + return ResponseEntity.ok() + .contentType(mediaType) + .cacheControl(CacheControl.maxAge(30, TimeUnit.DAYS).cachePublic()) + .body(resource); + } +} diff --git a/backend-single/src/main/java/com/emotion/dto/request/tts/TtsTaskCreateRequest.java b/backend-single/src/main/java/com/emotion/dto/request/tts/TtsTaskCreateRequest.java new file mode 100644 index 0000000..0577ce4 --- /dev/null +++ b/backend-single/src/main/java/com/emotion/dto/request/tts/TtsTaskCreateRequest.java @@ -0,0 +1,21 @@ +package com.emotion.dto.request.tts; + +import lombok.Data; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.Size; + +@Data +public class TtsTaskCreateRequest { + + @NotBlank + @Size(max = 50) + private String sourceType; + + @NotBlank + @Size(max = 64) + private String sourceId; + + @Size(max = 64) + private String voice; +} diff --git a/backend-single/src/main/java/com/emotion/dto/response/tts/TtsTaskResponse.java b/backend-single/src/main/java/com/emotion/dto/response/tts/TtsTaskResponse.java new file mode 100644 index 0000000..309e9a5 --- /dev/null +++ b/backend-single/src/main/java/com/emotion/dto/response/tts/TtsTaskResponse.java @@ -0,0 +1,18 @@ +package com.emotion.dto.response.tts; + +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +public class TtsTaskResponse { + + private String id; + private String sourceType; + private String sourceId; + private String status; + private String voice; + private String audioUrl; + private Long durationMs; + private String errorMessage; +} diff --git a/backend-single/src/main/java/com/emotion/entity/TtsTask.java b/backend-single/src/main/java/com/emotion/entity/TtsTask.java new file mode 100644 index 0000000..e88e89d --- /dev/null +++ b/backend-single/src/main/java/com/emotion/entity/TtsTask.java @@ -0,0 +1,55 @@ +package com.emotion.entity; + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableName; +import com.emotion.common.BaseEntity; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; + +@Data +@EqualsAndHashCode(callSuper = true) +@SuperBuilder +@NoArgsConstructor +@AllArgsConstructor +@TableName("t_tts_task") +public class TtsTask extends BaseEntity { + + @TableField("user_id") + private String userId; + + @TableField("source_type") + private String sourceType; + + @TableField("source_id") + private String sourceId; + + @TableField("text_hash") + private String textHash; + + @TableField("text_length") + private Integer textLength; + + @TableField("voice") + private String voice; + + @TableField("status") + private String status; + + @TableField("audio_url") + private String audioUrl; + + @TableField("audio_path") + private String audioPath; + + @TableField("duration_ms") + private Long durationMs; + + @TableField("error_message") + private String errorMessage; + + @TableField("request_count") + private Integer requestCount; +} diff --git a/backend-single/src/main/java/com/emotion/mapper/TtsTaskMapper.java b/backend-single/src/main/java/com/emotion/mapper/TtsTaskMapper.java new file mode 100644 index 0000000..de57ad9 --- /dev/null +++ b/backend-single/src/main/java/com/emotion/mapper/TtsTaskMapper.java @@ -0,0 +1,9 @@ +package com.emotion.mapper; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.emotion.entity.TtsTask; +import org.apache.ibatis.annotations.Mapper; + +@Mapper +public interface TtsTaskMapper extends BaseMapper { +} diff --git a/backend-single/src/main/java/com/emotion/service/TtsEngineClient.java b/backend-single/src/main/java/com/emotion/service/TtsEngineClient.java new file mode 100644 index 0000000..f1dc82a --- /dev/null +++ b/backend-single/src/main/java/com/emotion/service/TtsEngineClient.java @@ -0,0 +1,36 @@ +package com.emotion.service; + +public interface TtsEngineClient { + + TtsEngineResult synthesize(String text, String voice, String outputPath); + + class TtsEngineResult { + private final boolean success; + private final String audioPath; + private final Long durationMs; + private final String errorMessage; + + public TtsEngineResult(boolean success, String audioPath, Long durationMs, String errorMessage) { + this.success = success; + this.audioPath = audioPath; + this.durationMs = durationMs; + this.errorMessage = errorMessage; + } + + public boolean isSuccess() { + return success; + } + + public String getAudioPath() { + return audioPath; + } + + public Long getDurationMs() { + return durationMs; + } + + public String getErrorMessage() { + return errorMessage; + } + } +} diff --git a/backend-single/src/main/java/com/emotion/service/TtsTaskService.java b/backend-single/src/main/java/com/emotion/service/TtsTaskService.java new file mode 100644 index 0000000..3f629cc --- /dev/null +++ b/backend-single/src/main/java/com/emotion/service/TtsTaskService.java @@ -0,0 +1,15 @@ +package com.emotion.service; + +import com.baomidou.mybatisplus.extension.service.IService; +import com.emotion.dto.request.tts.TtsTaskCreateRequest; +import com.emotion.dto.response.tts.TtsTaskResponse; +import com.emotion.entity.TtsTask; + +public interface TtsTaskService extends IService { + + TtsTaskResponse createOrReuse(TtsTaskCreateRequest request); + + TtsTaskResponse getTask(String id); + + TtsTaskResponse getBySource(String sourceType, String sourceId, String voice); +} diff --git a/backend-single/src/main/java/com/emotion/service/impl/HttpTtsEngineClient.java b/backend-single/src/main/java/com/emotion/service/impl/HttpTtsEngineClient.java new file mode 100644 index 0000000..18aed35 --- /dev/null +++ b/backend-single/src/main/java/com/emotion/service/impl/HttpTtsEngineClient.java @@ -0,0 +1,46 @@ +package com.emotion.service.impl; + +import com.emotion.service.TtsEngineClient; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Service; +import org.springframework.web.client.RestTemplate; + +import java.util.Map; + +@Service +public class HttpTtsEngineClient implements TtsEngineClient { + + private final RestTemplate restTemplate; + + @Value("${emotion.tts.engine-url:http://127.0.0.1:19110}") + private String engineUrl; + + public HttpTtsEngineClient(RestTemplate restTemplate) { + this.restTemplate = restTemplate; + } + + @Override + public TtsEngineResult synthesize(String text, String voice, String outputPath) { + try { + Map body = Map.of( + "text", text, + "voice", voice, + "outputPath", outputPath + ); + ResponseEntity response = restTemplate.postForEntity(engineUrl + "/synthesize", body, Map.class); + Map data = response.getBody(); + boolean success = data != null && Boolean.TRUE.equals(data.get("success")); + if (!success) { + String message = data == null ? "empty response" : String.valueOf(data.get("errorMessage")); + return new TtsEngineResult(false, null, null, message); + } + Long durationMs = data.get("durationMs") instanceof Number + ? ((Number) data.get("durationMs")).longValue() + : null; + return new TtsEngineResult(true, String.valueOf(data.get("audioPath")), durationMs, null); + } catch (Exception e) { + return new TtsEngineResult(false, null, null, e.getMessage()); + } + } +} diff --git a/backend-single/src/main/java/com/emotion/service/impl/TtsTaskServiceImpl.java b/backend-single/src/main/java/com/emotion/service/impl/TtsTaskServiceImpl.java new file mode 100644 index 0000000..eab1653 --- /dev/null +++ b/backend-single/src/main/java/com/emotion/service/impl/TtsTaskServiceImpl.java @@ -0,0 +1,291 @@ +package com.emotion.service.impl; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.emotion.dto.request.tts.TtsTaskCreateRequest; +import com.emotion.dto.response.tts.TtsTaskResponse; +import com.emotion.entity.EpicScript; +import com.emotion.entity.TtsTask; +import com.emotion.mapper.EpicScriptMapper; +import com.emotion.mapper.TtsTaskMapper; +import com.emotion.service.TtsEngineClient; +import com.emotion.service.TtsTaskService; +import com.emotion.util.UserContextHolder; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.util.DigestUtils; +import org.springframework.util.StringUtils; + +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; + +@Slf4j +@Service +public class TtsTaskServiceImpl extends ServiceImpl implements TtsTaskService { + + private static final String SOURCE_TYPE_EPIC_SCRIPT = "epic_script"; + private static final String STATUS_PENDING = "pending"; + private static final String STATUS_PROCESSING = "processing"; + private static final String STATUS_SUCCESS = "success"; + private static final String STATUS_FAILED = "failed"; + + private final EpicScriptMapper epicScriptMapper; + private final TtsEngineClient ttsEngineClient; + private final Executor taskExecutor; + + @Value("${emotion.tts.enabled:true}") + private boolean enabled; + + @Value("${emotion.tts.output-dir:/data/uploads/emotion-museum/tts}") + private String outputDir; + + @Value("${emotion.tts.public-url-prefix:/uploads/emotion-museum/tts}") + private String publicUrlPrefix; + + @Value("${emotion.tts.max-text-length:5000}") + private int maxTextLength; + + @Value("${emotion.tts.default-voice:default_zh_female}") + private String defaultVoice; + + public TtsTaskServiceImpl(EpicScriptMapper epicScriptMapper, + TtsEngineClient ttsEngineClient, + @Qualifier("taskExecutor") Executor taskExecutor) { + this.epicScriptMapper = epicScriptMapper; + this.ttsEngineClient = ttsEngineClient; + this.taskExecutor = taskExecutor; + } + + @Override + public TtsTaskResponse createOrReuse(TtsTaskCreateRequest request) { + if (!enabled) { + throw new IllegalStateException("TTS service is disabled"); + } + + String userId = currentUserId(); + String sourceType = normalizeSourceType(request.getSourceType()); + String sourceId = request.getSourceId().trim(); + String voice = resolveVoice(request.getVoice()); + String cleaned = cleanText(loadSourceText(userId, sourceType, sourceId)); + if (!StringUtils.hasText(cleaned)) { + throw new IllegalArgumentException("Source text is empty"); + } + if (cleaned.length() > maxTextLength) { + cleaned = cleaned.substring(0, maxTextLength); + } + + String hash = DigestUtils.md5DigestAsHex((voice + "\n" + cleaned).getBytes(StandardCharsets.UTF_8)); + TtsTask owned = findOwnedTask(userId, sourceType, sourceId, voice, hash); + if (owned != null) { + incrementRequestCount(owned); + return toResponse(owned); + } + + TtsTask cachedSuccess = findSuccessfulCache(voice, hash); + if (cachedSuccess != null && StringUtils.hasText(cachedSuccess.getAudioUrl())) { + TtsTask task = buildTask(userId, sourceType, sourceId, voice, hash, cleaned.length()); + task.setStatus(STATUS_SUCCESS); + task.setAudioPath(cachedSuccess.getAudioPath()); + task.setAudioUrl(cachedSuccess.getAudioUrl()); + task.setDurationMs(cachedSuccess.getDurationMs()); + save(task); + incrementRequestCount(cachedSuccess); + return toResponse(task); + } + + TtsTask task = buildTask(userId, sourceType, sourceId, voice, hash, cleaned.length()); + save(task); + String synthesisText = cleaned; + CompletableFuture.runAsync(() -> process(task.getId(), synthesisText, voice, task.getAudioPath()), taskExecutor); + return toResponse(task); + } + + @Override + public TtsTaskResponse getTask(String id) { + String userId = currentUserId(); + TtsTask task = getById(id); + if (task == null || !userId.equals(task.getUserId())) { + return null; + } + return toResponse(task); + } + + @Override + public TtsTaskResponse getBySource(String sourceType, String sourceId, String voice) { + String userId = currentUserId(); + TtsTask task = getOne(new LambdaQueryWrapper() + .eq(TtsTask::getUserId, userId) + .eq(TtsTask::getSourceType, normalizeSourceType(sourceType)) + .eq(TtsTask::getSourceId, sourceId) + .eq(TtsTask::getVoice, resolveVoice(voice)) + .eq(TtsTask::getIsDeleted, 0) + .orderByDesc(TtsTask::getCreateTime) + .last("LIMIT 1")); + return task == null ? null : toResponse(task); + } + + private void process(String taskId, String text, String voice, String outputPath) { + try { + TtsTask task = getById(taskId); + if (task == null) { + return; + } + task.setStatus(STATUS_PROCESSING); + task.setErrorMessage(null); + updateById(task); + + TtsEngineClient.TtsEngineResult result = ttsEngineClient.synthesize(text, voice, outputPath); + task = getById(taskId); + if (task == null) { + return; + } + if (result.isSuccess()) { + task.setStatus(STATUS_SUCCESS); + task.setDurationMs(result.getDurationMs()); + if (StringUtils.hasText(result.getAudioPath())) { + task.setAudioPath(result.getAudioPath()); + } + task.setErrorMessage(null); + } else { + task.setStatus(STATUS_FAILED); + task.setErrorMessage(limitError(result.getErrorMessage())); + } + updateById(task); + } catch (Exception e) { + log.warn("TTS task processing failed, taskId={}", taskId, e); + TtsTask task = getById(taskId); + if (task != null) { + task.setStatus(STATUS_FAILED); + task.setErrorMessage(limitError(e.getMessage())); + updateById(task); + } + } + } + + private TtsTask buildTask(String userId, String sourceType, String sourceId, String voice, String hash, int textLength) { + String filename = hash + ".mp3"; + return TtsTask.builder() + .userId(userId) + .sourceType(sourceType) + .sourceId(sourceId) + .textHash(hash) + .textLength(textLength) + .voice(voice) + .status(STATUS_PENDING) + .audioPath(joinPath(outputDir, filename)) + .audioUrl(joinPath(publicUrlPrefix, filename)) + .requestCount(1) + .build(); + } + + private TtsTask findOwnedTask(String userId, String sourceType, String sourceId, String voice, String hash) { + return getOne(new LambdaQueryWrapper() + .eq(TtsTask::getUserId, userId) + .eq(TtsTask::getSourceType, sourceType) + .eq(TtsTask::getSourceId, sourceId) + .eq(TtsTask::getVoice, voice) + .eq(TtsTask::getTextHash, hash) + .eq(TtsTask::getIsDeleted, 0) + .orderByDesc(TtsTask::getCreateTime) + .last("LIMIT 1")); + } + + private TtsTask findSuccessfulCache(String voice, String hash) { + return getOne(new LambdaQueryWrapper() + .eq(TtsTask::getTextHash, hash) + .eq(TtsTask::getVoice, voice) + .eq(TtsTask::getStatus, STATUS_SUCCESS) + .eq(TtsTask::getIsDeleted, 0) + .orderByDesc(TtsTask::getCreateTime) + .last("LIMIT 1")); + } + + private void incrementRequestCount(TtsTask task) { + task.setRequestCount((task.getRequestCount() == null ? 0 : task.getRequestCount()) + 1); + updateById(task); + } + + private String loadSourceText(String userId, String sourceType, String sourceId) { + if (!SOURCE_TYPE_EPIC_SCRIPT.equals(sourceType)) { + throw new IllegalArgumentException("Unsupported sourceType"); + } + EpicScript script = epicScriptMapper.selectById(sourceId); + if (script == null || !userId.equals(script.getUserId())) { + throw new IllegalArgumentException("Script not found"); + } + + StringBuilder text = new StringBuilder(); + append(text, script.getTitle()); + append(text, script.getPlotIntro()); + append(text, script.getPlotTurning()); + append(text, script.getPlotClimax()); + append(text, script.getPlotEnding()); + Map plotJson = script.getPlotJson(); + if (plotJson != null && plotJson.get("fullContent") != null) { + append(text, String.valueOf(plotJson.get("fullContent"))); + } + return text.toString(); + } + + public static String cleanText(String text) { + if (text == null) { + return ""; + } + return text.replaceAll("[#>*_`\\-]", "") + .replaceAll("\\s+", " ") + .trim(); + } + + private TtsTaskResponse toResponse(TtsTask task) { + return TtsTaskResponse.builder() + .id(task.getId()) + .sourceType(task.getSourceType()) + .sourceId(task.getSourceId()) + .status(task.getStatus()) + .voice(task.getVoice()) + .audioUrl(STATUS_SUCCESS.equals(task.getStatus()) ? task.getAudioUrl() : null) + .durationMs(task.getDurationMs()) + .errorMessage(task.getErrorMessage()) + .build(); + } + + private String currentUserId() { + String userId = UserContextHolder.getCurrentUserId(); + if (!StringUtils.hasText(userId)) { + throw new IllegalArgumentException("User not logged in"); + } + return userId; + } + + private String normalizeSourceType(String sourceType) { + return StringUtils.hasText(sourceType) ? sourceType.trim() : SOURCE_TYPE_EPIC_SCRIPT; + } + + private String resolveVoice(String voice) { + return StringUtils.hasText(voice) ? voice.trim() : defaultVoice; + } + + private static void append(StringBuilder text, String value) { + if (StringUtils.hasText(value)) { + text.append(value).append("\n\n"); + } + } + + private static String joinPath(String prefix, String filename) { + if (prefix.endsWith("/")) { + return prefix + filename; + } + return prefix + "/" + filename; + } + + private static String limitError(String message) { + if (message == null) { + return "TTS synthesis failed"; + } + return message.length() > 1000 ? message.substring(0, 1000) : message; + } +} diff --git a/backend-single/src/main/resources/application-prod.yml b/backend-single/src/main/resources/application-prod.yml index a4f5899..e6cef5a 100644 --- a/backend-single/src/main/resources/application-prod.yml +++ b/backend-single/src/main/resources/application-prod.yml @@ -55,6 +55,15 @@ emotion: # 文件上传路径 - 生产环境 upload: path: /data/uploads/emotion-museum + + # Text-to-speech config + tts: + enabled: true + engine-url: http://127.0.0.1:19110 + output-dir: /data/uploads/emotion-museum/tts + public-url-prefix: /tts/audio + max-text-length: 5000 + default-voice: default_zh_female # 生产模式配置 prod: diff --git a/backend-single/src/main/resources/application.yml b/backend-single/src/main/resources/application.yml index c1490e9..cd85bfc 100644 --- a/backend-single/src/main/resources/application.yml +++ b/backend-single/src/main/resources/application.yml @@ -93,6 +93,15 @@ emotion: max-file-size: 10MB allowed-types: jpg,jpeg,png,gif,pdf,doc,docx + # Text-to-speech config + tts: + enabled: true + engine-url: http://127.0.0.1:19110 + output-dir: /data/uploads/emotion-museum/tts + public-url-prefix: /tts/audio + max-text-length: 5000 + default-voice: default_zh_female + # 安全配置 security: ignore-urls: diff --git a/backend-single/src/test/java/com/emotion/service/TtsTaskServiceTest.java b/backend-single/src/test/java/com/emotion/service/TtsTaskServiceTest.java new file mode 100644 index 0000000..231a97a --- /dev/null +++ b/backend-single/src/test/java/com/emotion/service/TtsTaskServiceTest.java @@ -0,0 +1,40 @@ +package com.emotion.service; + +import com.emotion.service.impl.TtsTaskServiceImpl; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TtsTaskServiceTest { + + @Test + @DisplayName("cleanText strips markdown and normalizes whitespace") + void cleanTextStripsMarkdownAndNormalizesWhitespace() { + String cleaned = TtsTaskServiceImpl.cleanText("# Title\n\n> **hello** `world` - ok"); + + assertEquals("Title hello world ok", cleaned); + } + + @Test + @DisplayName("cleanText returns empty string for null input") + void cleanTextReturnsEmptyForNull() { + assertEquals("", TtsTaskServiceImpl.cleanText(null)); + } + + @Test + @DisplayName("TtsEngineResult exposes synthesis result fields") + void ttsEngineResultExposesFields() { + TtsEngineClient.TtsEngineResult result = + new TtsEngineClient.TtsEngineResult(true, "/tmp/a.mp3", 1200L, null); + + assertTrue(result.isSuccess()); + assertEquals("/tmp/a.mp3", result.getAudioPath()); + assertEquals(1200L, result.getDurationMs()); + assertNull(result.getErrorMessage()); + assertFalse(new TtsEngineClient.TtsEngineResult(false, null, null, "boom").isSuccess()); + } +} diff --git a/backend-single/tts-service/README.md b/backend-single/tts-service/README.md new file mode 100644 index 0000000..34d486c --- /dev/null +++ b/backend-single/tts-service/README.md @@ -0,0 +1,19 @@ +# Emotion Museum TTS Service + +Install on `101.200.208.45`: + +```bash +cd /data/programs/emotion-museum/tts-service +python3 -m venv .venv +. .venv/bin/activate +pip install -r requirements.txt + +git clone https://github.com/myshell-ai/MeloTTS.git /data/programs/MeloTTS +cd /data/programs/MeloTTS +/data/programs/emotion-museum/tts-service/.venv/bin/pip install -e . +/data/programs/emotion-museum/tts-service/.venv/bin/python -m unidic download + +cd /data/programs/emotion-museum/tts-service +uvicorn app:app --host 127.0.0.1 --port 19110 +curl http://127.0.0.1:19110/health +``` diff --git a/backend-single/tts-service/app.py b/backend-single/tts-service/app.py new file mode 100644 index 0000000..38d93df --- /dev/null +++ b/backend-single/tts-service/app.py @@ -0,0 +1,59 @@ +from pathlib import Path +from threading import Lock + +from fastapi import FastAPI +from pydantic import BaseModel, Field + +app = FastAPI(title="Emotion Museum TTS") + +_model = None +_speaker_ids = None +_model_lock = Lock() + + +class SynthesizeRequest(BaseModel): + text: str = Field(min_length=1, max_length=5000) + voice: str = "default_zh_female" + outputPath: str + + +def get_model(): + global _model, _speaker_ids + with _model_lock: + if _model is None: + from melo.api import TTS + + _model = TTS(language="ZH", device="cpu") + _speaker_ids = _model.hps.data.spk2id + return _model, _speaker_ids + + +@app.get("/health") +def health(): + return {"status": "ok"} + + +@app.post("/synthesize") +def synthesize(request: SynthesizeRequest): + output = Path(request.outputPath) + output.parent.mkdir(parents=True, exist_ok=True) + + try: + model, speaker_ids = get_model() + speaker_id = speaker_ids.get("ZH") + model.tts_to_file(request.text, speaker_id, str(output), speed=1.0) + except Exception as exc: + return { + "success": False, + "audioPath": None, + "durationMs": None, + "engine": "melotts", + "errorMessage": str(exc), + } + + return { + "success": True, + "audioPath": str(output), + "durationMs": None, + "engine": "melotts", + } diff --git a/backend-single/tts-service/emotion-museum-tts.service b/backend-single/tts-service/emotion-museum-tts.service new file mode 100644 index 0000000..396ca41 --- /dev/null +++ b/backend-single/tts-service/emotion-museum-tts.service @@ -0,0 +1,13 @@ +[Unit] +Description=Emotion Museum TTS Service +After=network.target + +[Service] +Type=simple +WorkingDirectory=/data/programs/emotion-museum/tts-service +ExecStart=/data/programs/emotion-museum/tts-service/.venv/bin/uvicorn app:app --host 127.0.0.1 --port 19110 +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/backend-single/tts-service/requirements.txt b/backend-single/tts-service/requirements.txt new file mode 100644 index 0000000..a6d2796 --- /dev/null +++ b/backend-single/tts-service/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.111.0 +uvicorn[standard]==0.30.1 +pydantic==2.7.4 diff --git a/mini-program/src/services/tts.js b/mini-program/src/services/tts.js index 87e68a4..a63e836 100644 --- a/mini-program/src/services/tts.js +++ b/mini-program/src/services/tts.js @@ -1,18 +1,37 @@ import { get, post } from './request.js' +import { getEnvValue } from '../config/env.js' const DEFAULT_SOURCE_TYPE = 'epic_script' const DEFAULT_VOICE = 'default_zh_female' +const normalizeAudioUrl = (task) => { + if (!task?.audioUrl || /^https?:\/\//.test(task.audioUrl)) { + return task + } + return { + ...task, + audioUrl: `${getEnvValue('API_BASE_URL')}${task.audioUrl.startsWith('/') ? task.audioUrl : `/${task.audioUrl}`}` + } +} + +const normalizeResponse = (response) => { + if (!response?.data) return response + return { + ...response, + data: normalizeAudioUrl(response.data) + } +} + export const createTtsTask = ({ sourceType = DEFAULT_SOURCE_TYPE, sourceId, voice = DEFAULT_VOICE }) => { - return post('/tts/tasks', { sourceType, sourceId, voice }) + return post('/tts/tasks', { sourceType, sourceId, voice }).then(normalizeResponse) } export const getTtsTask = (id) => { - return get(`/tts/tasks/${id}`) + return get(`/tts/tasks/${id}`).then(normalizeResponse) } export const getTtsTaskBySource = ({ @@ -20,7 +39,7 @@ export const getTtsTaskBySource = ({ sourceId, voice = DEFAULT_VOICE }) => { - return get('/tts/tasks/by-source', { sourceType, sourceId, voice }) + return get('/tts/tasks/by-source', { sourceType, sourceId, voice }).then(normalizeResponse) } export default { diff --git a/sql/2026-05-17-tts-task.sql b/sql/2026-05-17-tts-task.sql new file mode 100644 index 0000000..325ea1f --- /dev/null +++ b/sql/2026-05-17-tts-task.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS t_tts_task ( + id VARCHAR(64) PRIMARY KEY COMMENT 'Primary key', + user_id VARCHAR(64) NOT NULL COMMENT 'Owner user id', + source_type VARCHAR(50) NOT NULL COMMENT 'Source type, for example epic_script', + source_id VARCHAR(64) NOT NULL COMMENT 'Source content id', + text_hash VARCHAR(128) NOT NULL COMMENT 'Hash of cleaned text and voice', + text_length INT NOT NULL COMMENT 'Cleaned text length', + voice VARCHAR(64) NOT NULL DEFAULT 'default_zh_female' COMMENT 'Voice id', + status VARCHAR(20) NOT NULL DEFAULT 'pending' COMMENT 'pending, processing, success, failed', + audio_url VARCHAR(500) NULL COMMENT 'Public audio URL', + audio_path VARCHAR(500) NULL COMMENT 'Server audio path', + duration_ms BIGINT NULL COMMENT 'Audio duration', + error_message VARCHAR(1000) NULL COMMENT 'Failure message', + request_count INT NOT NULL DEFAULT 1 COMMENT 'Cache hit request count', + create_by VARCHAR(64) NULL COMMENT 'Creator', + create_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT 'Create time', + update_by VARCHAR(64) NULL COMMENT 'Updater', + update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update time', + is_deleted TINYINT DEFAULT 0 COMMENT 'Logic delete flag', + remarks VARCHAR(500) NULL COMMENT 'Remarks' +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='Text-to-speech task table'; + +CREATE INDEX idx_tts_task_user_source ON t_tts_task (user_id, source_type, source_id); +CREATE INDEX idx_tts_task_text_hash ON t_tts_task (text_hash); +CREATE INDEX idx_tts_task_status ON t_tts_task (status); +CREATE INDEX idx_tts_task_create_time ON t_tts_task (create_time);