Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ public JsonObject getInput() {
jsonObject.addProperty(ApiKeywords.VOICE, voice.getValue());
}

if (parameters != null && !parameters.isEmpty() && parameters.containsKey(ApiKeywords.VOICE)) {
jsonObject.addProperty(ApiKeywords.VOICE, (String) parameters.get(ApiKeywords.VOICE));
}
Comment on lines +185 to +187

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current logic allows the voice parameter from the generic parameters map to overwrite the value from the dedicated voice field if both are present. This can lead to unexpected behavior. The dedicated voice field should have precedence. To fix this, you can check if the voice property has already been set on the jsonObject before adding it from the parameters map.

Suggested change
if (parameters != null && !parameters.isEmpty() && parameters.containsKey(ApiKeywords.VOICE)) {
jsonObject.addProperty(ApiKeywords.VOICE, (String) parameters.get(ApiKeywords.VOICE));
}
if (parameters != null && !parameters.isEmpty() && parameters.containsKey(ApiKeywords.VOICE) && !jsonObject.has(ApiKeywords.VOICE)) {
jsonObject.addProperty(ApiKeywords.VOICE, (String) parameters.get(ApiKeywords.VOICE));
}


if (languageType != null) {
jsonObject.addProperty(ApiKeywords.LANGUAGE_TYPE, languageType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ public class SpeechSynthesisApiKeywords {
public static final String TEXT_TYPE = "text_type";

public static final String FORMAT = "format";

public static final String BIT_RATE = "bit_rate";

public static final String VOICE = "voice";

public static final String SAMPLE_RATE = "sample_rate";
Expand All @@ -21,6 +23,12 @@ public class SpeechSynthesisApiKeywords {

public static final String PHONEME_TIMESTAMP = "phoneme_timestamp_enabled";

public static final String HOT_FIX = "hot_fix";

public static final String PRONUNCIATION = "pronunciation";

public static final String REPLACE = "replace";

public static final String SENTENCE = "sentence";

public static final String WORDS = "words";
Expand Down
62 changes: 62 additions & 0 deletions src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Alibaba, Inc. and its affiliates.

package com.alibaba.dashscope.audio.ttsv2;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Data;

/** Hot fix configuration for speech synthesis, including pronunciation and replace rules. */
@Data
public class ParamHotFix {

/** Pronunciation rules to customize specific words. */
private List<PronunciationItem> pronunciation;

/** Replace rules to replace specific words with others. */
private List<ReplaceItem> replace;

public ArrayList<Object> getPronunciation() {
if (pronunciation == null || pronunciation.isEmpty()) {
return null;
}
ArrayList<Object> pronunciationList = new ArrayList<>();
for (PronunciationItem item : pronunciation) {
HashMap<String, String> pronunciationItem = new HashMap<>();
pronunciationItem.put(item.getText(), item.getPinyin());
pronunciationList.add(pronunciationItem);
}

return pronunciationList;
}

public ArrayList<Object> getReplace() {
if (replace == null || replace.isEmpty()) {
return null;
}
ArrayList<Object> replaceList = new ArrayList<>();
for (ReplaceItem item : replace) {
HashMap<String, String> replaceItem = new HashMap<>();
replaceItem.put(item.getText(), item.getReplacement());
replaceList.add(replaceItem);
}

return replaceList;
}

@Data
@AllArgsConstructor
public static class PronunciationItem {
private String text;
private String pinyin;
}

@Data
@AllArgsConstructor
public static class ReplaceItem {
private String text;
private String replacement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam {
@Builder.Default private List<String> languageHints = null;
/** synthesis style */
@Builder.Default private int style = 0;
/** Hot fix configuration for pronunciation and replace rules. */
@Builder.Default private ParamHotFix hotFix = null;


@Override
public Map<String, Object> getParameters() {
Expand Down Expand Up @@ -83,6 +86,20 @@ public Map<String, Object> getParameters() {
if (getStyle() != 0) {
params.put(SpeechSynthesisApiKeywords.STYLE, getStyle());
}
// Add hot fix parameters if present
if (getHotFix() != null) {
Map<String, Object> hotFixParams = new HashMap<>();
if (getHotFix().getPronunciation() != null && !getHotFix().getPronunciation().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.PRONUNCIATION, getHotFix().getPronunciation());
}
if (getHotFix().getReplace() != null && !getHotFix().getReplace().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.REPLACE, getHotFix().getReplace());
}
if (!hotFixParams.isEmpty()) {
params.put(SpeechSynthesisApiKeywords.HOT_FIX, hotFixParams);
}
}
Comment on lines +90 to +101

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This block has a performance issue: getHotFix().getPronunciation() and getHotFix().getReplace() are called multiple times. Since these methods create new collections on each invocation, this is inefficient. You should call them once and store the results in local variables.

Additionally, the method names getPronunciation and getReplace in ParamHotFix are confusing as they override Lombok's getters but perform data transformation. It would be clearer to rename them to something like buildPronunciationPayload() and buildReplacePayload() to improve code clarity.

Suggested change
if (getHotFix() != null) {
Map<String, Object> hotFixParams = new HashMap<>();
if (getHotFix().getPronunciation() != null && !getHotFix().getPronunciation().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.PRONUNCIATION, getHotFix().getPronunciation());
}
if (getHotFix().getReplace() != null && !getHotFix().getReplace().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.REPLACE, getHotFix().getReplace());
}
if (!hotFixParams.isEmpty()) {
params.put(SpeechSynthesisApiKeywords.HOT_FIX, hotFixParams);
}
}
if (getHotFix() != null) {
Map<String, Object> hotFixParams = new HashMap<>();
List<Object> pronunciations = getHotFix().getPronunciation();
if (pronunciations != null && !pronunciations.isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.PRONUNCIATION, pronunciations);
}
List<Object> replaces = getHotFix().getReplace();
if (replaces != null && !replaces.isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.REPLACE, replaces);
}
if (!hotFixParams.isEmpty()) {
params.put(SpeechSynthesisApiKeywords.HOT_FIX, hotFixParams);
}
}


params.putAll(parameters);
return params;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.nio.ByteBuffer;
import java.security.InvalidParameterException;
import java.util.List;

import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;
Expand All @@ -25,6 +26,8 @@ public class VoiceEnrollmentParam extends HalfDuplexServiceParam {

private int pageIndex;
private int pageSize;
/** Maximum length of prompt audio in seconds. */
private float maxPromptAudioLength = 10.0f;

protected VoiceEnrollmentParam(HalfDuplexServiceParamBuilder<?, ?> b) {
super(b);
Expand All @@ -50,6 +53,9 @@ public JsonObject getInput() {
if (languageHints != null) {
input.add("language_hints", JsonUtils.toJsonArray(languageHints));
}
if (maxPromptAudioLength > 0) {
input.addProperty("max_prompt_audio_length", maxPromptAudioLength);
}
break;
case LIST:
input.addProperty(ApiKeywords.ACTION, operationType.getValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public Voice createVoice(
.languageHints(customParam.getLanguageHints())
.headers(customParam.getHeaders())
.resources(customParam.getResources())
.maxPromptAudioLength(customParam.getMaxPromptAudioLength())
.parameters(customParam.getParameters())
.workspace(customParam.getWorkspace())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static org.junit.Assert.assertEquals;

import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.ParamHotFix;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
Expand Down Expand Up @@ -129,6 +130,16 @@ public void testStreamingCall() {

// 获取 URL
String url = mockServer.url("/binary").toString();
ParamHotFix hotFix = new ParamHotFix();
ArrayList<ParamHotFix.PronunciationItem> pronunciations = new ArrayList<>();
pronunciations.add(new ParamHotFix.PronunciationItem("今天", "jin1 tian1"));
pronunciations.add(new ParamHotFix.PronunciationItem("草地", "cao3 di4"));
hotFix.setPronunciation(pronunciations);

ArrayList<ParamHotFix.ReplaceItem> replaces = new ArrayList<>();
replaces.add(new ParamHotFix.ReplaceItem("草地", "草弟"));
replaces.add(new ParamHotFix.ReplaceItem("惠州", "汇州"));
hotFix.setReplace(replaces);

// 在真实世界中,你会在这里做 HTTP 请求,并得到响应
System.out.println("Mock Server is running at: " + url);
Expand All @@ -138,6 +149,7 @@ public void testStreamingCall() {
.model("cosyvoice-v1")
.voice("longxiaochun")
.format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS)
.hotFix(hotFix)
.build();
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
synthesizer.setStartedTimeout(1000);
Expand Down
Loading