Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ public JsonObject getInput() {
jsonObject.addProperty(ApiKeywords.VOICE, voice.getValue());
}

if (parameters != null && !parameters.isEmpty() && parameters.containsKey(ApiKeywords.VOICE)) {
jsonObject.addProperty(ApiKeywords.VOICE, (String) parameters.get(ApiKeywords.VOICE));
}

if (languageType != null) {
jsonObject.addProperty(ApiKeywords.LANGUAGE_TYPE, languageType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ public class SpeechSynthesisApiKeywords {
public static final String TEXT_TYPE = "text_type";

public static final String FORMAT = "format";

public static final String BIT_RATE = "bit_rate";

public static final String VOICE = "voice";

public static final String SAMPLE_RATE = "sample_rate";
Expand All @@ -21,6 +23,12 @@ public class SpeechSynthesisApiKeywords {

public static final String PHONEME_TIMESTAMP = "phoneme_timestamp_enabled";

public static final String HOT_FIX = "hot_fix";

public static final String PRONUNCIATION = "pronunciation";

public static final String REPLACE = "replace";

public static final String SENTENCE = "sentence";

public static final String WORDS = "words";
Expand Down
62 changes: 62 additions & 0 deletions src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Alibaba, Inc. and its affiliates.

package com.alibaba.dashscope.audio.ttsv2;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Data;

/** Hot fix configuration for speech synthesis, including pronunciation and replace rules. */
@Data
public class ParamHotFix {

/** Pronunciation rules to customize specific words. */
private List<PronunciationItem> pronunciation;

/** Replace rules to replace specific words with others. */
private List<ReplaceItem> replace;

public ArrayList<Object> getPronunciation() {
if (pronunciation == null || pronunciation.isEmpty()) {
return null;
}
ArrayList<Object> pronunciationList = new ArrayList<>();
for (PronunciationItem item : pronunciation) {
HashMap<String, String> pronunciationItem = new HashMap<>();
pronunciationItem.put(item.getText(), item.getPinyin());
pronunciationList.add(pronunciationItem);
}

return pronunciationList;
}

public ArrayList<Object> getReplace() {
if (replace == null || replace.isEmpty()) {
return null;
}
ArrayList<Object> replaceList = new ArrayList<>();
for (ReplaceItem item : replace) {
HashMap<String, String> replaceItem = new HashMap<>();
replaceItem.put(item.getText(), item.getReplacement());
replaceList.add(replaceItem);
}

return replaceList;
}

@Data
@AllArgsConstructor
public static class PronunciationItem {
private String text;
private String pinyin;
}

@Data
@AllArgsConstructor
public static class ReplaceItem {
private String text;
private String replacement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam {
@Builder.Default private List<String> languageHints = null;
/** synthesis style */
@Builder.Default private int style = 0;
/** Hot fix configuration for pronunciation and replace rules. */
@Builder.Default private ParamHotFix hotFix = null;

@Override
public Map<String, Object> getParameters() {
Expand Down Expand Up @@ -83,6 +85,20 @@ public Map<String, Object> getParameters() {
if (getStyle() != 0) {
params.put(SpeechSynthesisApiKeywords.STYLE, getStyle());
}
// Add hot fix parameters if present
if (getHotFix() != null) {
Map<String, Object> hotFixParams = new HashMap<>();
if (getHotFix().getPronunciation() != null && !getHotFix().getPronunciation().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.PRONUNCIATION, getHotFix().getPronunciation());
}
if (getHotFix().getReplace() != null && !getHotFix().getReplace().isEmpty()) {
hotFixParams.put(SpeechSynthesisApiKeywords.REPLACE, getHotFix().getReplace());
}
if (!hotFixParams.isEmpty()) {
params.put(SpeechSynthesisApiKeywords.HOT_FIX, hotFixParams);
}
}

params.putAll(parameters);
return params;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class VoiceEnrollmentParam extends HalfDuplexServiceParam {

private int pageIndex;
private int pageSize;
/** Maximum length of prompt audio in seconds. */
private float maxPromptAudioLength = 10.0f;

protected VoiceEnrollmentParam(HalfDuplexServiceParamBuilder<?, ?> b) {
super(b);
Expand All @@ -50,6 +52,9 @@ public JsonObject getInput() {
if (languageHints != null) {
input.add("language_hints", JsonUtils.toJsonArray(languageHints));
}
if (maxPromptAudioLength > 0) {
input.addProperty("max_prompt_audio_length", maxPromptAudioLength);
}
break;
case LIST:
input.addProperty(ApiKeywords.ACTION, operationType.getValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public Voice createVoice(
.languageHints(customParam.getLanguageHints())
.headers(customParam.getHeaders())
.resources(customParam.getResources())
.maxPromptAudioLength(customParam.getMaxPromptAudioLength())
.parameters(customParam.getParameters())
.workspace(customParam.getWorkspace())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import static org.junit.Assert.assertEquals;

import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.ParamHotFix;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
Expand Down Expand Up @@ -129,6 +130,16 @@ public void testStreamingCall() {

// 获取 URL
String url = mockServer.url("/binary").toString();
ParamHotFix hotFix = new ParamHotFix();
ArrayList<ParamHotFix.PronunciationItem> pronunciations = new ArrayList<>();
pronunciations.add(new ParamHotFix.PronunciationItem("今天", "jin1 tian1"));
pronunciations.add(new ParamHotFix.PronunciationItem("草地", "cao3 di4"));
hotFix.setPronunciation(pronunciations);

ArrayList<ParamHotFix.ReplaceItem> replaces = new ArrayList<>();
replaces.add(new ParamHotFix.ReplaceItem("草地", "草弟"));
replaces.add(new ParamHotFix.ReplaceItem("惠州", "汇州"));
hotFix.setReplace(replaces);

// 在真实世界中,你会在这里做 HTTP 请求,并得到响应
System.out.println("Mock Server is running at: " + url);
Expand All @@ -138,6 +149,7 @@ public void testStreamingCall() {
.model("cosyvoice-v1")
.voice("longxiaochun")
.format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS)
.hotFix(hotFix)
.build();
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback);
synthesizer.setStartedTimeout(1000);
Expand Down