Skip to content

Commit eb9da4e

Browse files
authored
Merge pull request #110 from songguocola/dev/0209_cosyvoice
feat(model/cosyvoice): add hot_fix and max_prompt_audio_length param
2 parents 8e07d74 + d690c11 commit eb9da4e

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

dashscope/audio/tts_v2/enrollment.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,15 @@ def create_voice(
8888
prefix: str,
8989
url: str,
9090
language_hints: List[str] = None,
91+
max_prompt_audio_length: float = None,
9192
) -> str:
9293
"""
9394
创建新克隆音色
9495
param: target_model 克隆音色对应的语音合成模型版本
9596
param: prefix 音色自定义前缀,仅允许数字和小写字母,小于十个字符。
9697
param: url 用于克隆的音频文件url
9798
param: language_hints 克隆音色目标语言
99+
param: max_prompt_audio_length 音频预处理输出的prompt audio最长长度。单位为秒。默认为10s。
98100
return: voice_id
99101
"""
100102

@@ -106,6 +108,8 @@ def create_voice(
106108
}
107109
if language_hints is not None:
108110
input_params["language_hints"] = language_hints
111+
if max_prompt_audio_length is not None:
112+
input_params["max_prompt_audio_length"] = max_prompt_audio_length
109113
response = self.__call_with_input(input_params)
110114
self._last_request_id = response.request_id
111115
if response.status_code == 200:

dashscope/audio/tts_v2/speech_synthesizer.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import threading
99
import time
1010
import uuid
11+
from dataclasses import dataclass
1112
from enum import Enum, unique
13+
from typing import Dict, List, Optional
1214

1315
import websocket
1416

@@ -26,6 +28,35 @@
2628
)
2729

2830

31+
@dataclass
32+
class HotFix:
33+
"""
34+
Hot fix parameters for pronunciation and text replacement.
35+
36+
Attributes:
37+
pronunciation: List of pronunciation, e.g., [{"草地": "cao3 di4"}]
38+
replace: List of text replacement, e.g., [{"草地": "草弟"}]
39+
40+
Example:
41+
hot_fix = HotFix(
42+
pronunciation=[{"草地": "cao3 di4"}],
43+
replace=[{"草地": "草弟"}]
44+
)
45+
hot_fix_dict = hot_fix.to_dict()
46+
"""
47+
48+
pronunciation: Optional[List[Dict[str, str]]] = None
49+
replace: Optional[List[Dict[str, str]]] = None
50+
51+
def to_dict(self) -> Dict[str, List[Dict[str, str]]]:
52+
result = {}
53+
if self.pronunciation is not None:
54+
result["pronunciation"] = self.pronunciation
55+
if self.replace is not None:
56+
result["replace"] = self.replace
57+
return result
58+
59+
2960
class ResultCallback:
3061
"""
3162
An interface that defines callback methods for getting speech synthesis results. # noqa E501
@@ -246,6 +277,7 @@ def __init__( # pylint: disable=redefined-builtin
246277
callback: ResultCallback = None,
247278
workspace=None,
248279
url=None,
280+
hot_fix=None,
249281
additional_params=None,
250282
):
251283
"""
@@ -282,6 +314,14 @@ def __init__( # pylint: disable=redefined-builtin
282314
The language hints of the synthesizer. supported language: zh, en.
283315
additional_params: Dict
284316
Additional parameters for the Dashscope API.
317+
hot_fix: Dict or HotFix
318+
Hot fix parameters for pronunciation and text replacement.
319+
Example: {
320+
"pronunciation": [{"草地": "cao3 di4"}],
321+
"replace": [{"草地": "草弟"}]
322+
}
323+
enable_markdown_filter: bool
324+
Whether to enable markdown filter. should be set into additional_params.
285325
"""
286326
self.ws = None
287327
self.start_event = threading.Event()
@@ -316,6 +356,7 @@ def __init__( # pylint: disable=redefined-builtin
316356
workspace,
317357
url,
318358
additional_params,
359+
hot_fix,
319360
)
320361

321362
def __send_str(self, data: str):
@@ -404,6 +445,7 @@ def __update_params( # pylint: disable=redefined-builtin
404445
url=None,
405446
additional_params=None,
406447
close_ws_after_use=True,
448+
hot_fix=None,
407449
):
408450
if model is None:
409451
raise ModelRequired("Model is required!")
@@ -417,6 +459,17 @@ def __update_params( # pylint: disable=redefined-builtin
417459
raise InputRequired("apikey is required!")
418460
self.headers = headers
419461
self.workspace = workspace
462+
463+
# Merge hot_fix into additional_params
464+
if hot_fix is not None:
465+
if additional_params is None:
466+
additional_params = {}
467+
# Support both HotFix instance and dict
468+
if isinstance(hot_fix, HotFix):
469+
additional_params["hot_fix"] = hot_fix.to_dict()
470+
else:
471+
additional_params["hot_fix"] = hot_fix
472+
420473
self.additional_params = additional_params
421474
self.model = model
422475
self.voice = voice

dashscope/multimodal/multimodal_dialog.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def _send_start_request(
363363
self._send_text_frame(_start_json)
364364

365365
def _run_forever(self):
366-
self.ws.run_forever(ping_interval=20, ping_timeout=10)
366+
self.ws.run_forever(ping_interval=None, ping_timeout=None)
367367

368368
def _connect(self, api_key: str):
369369
"""初始化WebSocket连接并发送启动请求。"""
@@ -376,7 +376,6 @@ def _connect(self, api_key: str):
376376
on_close=self._on_close,
377377
)
378378
self.thread = threading.Thread(target=self._run_forever)
379-
self.ws.ping_interval = 3
380379
self.thread.daemon = True
381380
self.thread.start()
382381

0 commit comments

Comments
 (0)