From f59a4ee8629607ee68104fa8ee1fe046c5c567f9 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 09:24:10 -0300 Subject: [PATCH 01/10] Feat: Create a tts api route --- pyproject.toml | 1 + requirements.txt | 3 ++- rvc_python/api.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 86798d6..d39c408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "fastapi", "pydantic", "python-multipart", + "edge-tts", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index a1c4e53..50b692d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ loguru uvicorn fastapi pydantic -python-multipart \ No newline at end of file +python-multipart +edge-tts \ No newline at end of file diff --git a/rvc_python/api.py b/rvc_python/api.py index 60bd4c0..bcb5216 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -3,6 +3,7 @@ from fastapi.responses import Response, JSONResponse from loguru import logger from pydantic import BaseModel +import edge_tts import tempfile import base64 import shutil @@ -117,6 +118,25 @@ def set_models_dir(request: SetModelsDirRequest): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) + @app.post("/tts") + async def tts(request: SetModelsDirRequest): + tmp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") + try: + logger.info("Received request to generate audio by tts") + output_path = tmp_output.name + + communicate = edge_tts.Communicate(request.text, request.voice) + await communicate.save(output_path) + + output_data = tmp_output.read() + return Response(content=output_data, media_type="audio/wav") + except Exception as e: + logger.error(e) + raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") + finally: + tmp_output.close() + os.unlink(tmp_output.name) + def create_app(): app = FastAPI() From 889ac0f15720a09e7aae2949288cb301d2b1a85e Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 09:35:17 -0300 Subject: [PATCH 02/10] Fix: Request parameters for tts --- rvc_python/api.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index bcb5216..33f2c7d 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -22,6 +22,10 @@ class SetParamsRequest(BaseModel): class SetModelsDirRequest(BaseModel): models_dir: str +class TTSRequest(BaseModel): + text: str + voice: str + def setup_routes(app: FastAPI): @app.post("/convert") def rvc_convert(request: ConvertAudioRequest): @@ -119,7 +123,7 @@ def set_models_dir(request: SetModelsDirRequest): raise HTTPException(status_code=400, detail=str(e)) @app.post("/tts") - async def tts(request: SetModelsDirRequest): + async def tts(request: TTSRequest): tmp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") try: logger.info("Received request to generate audio by tts") From 109eebf4a7fae585da39ba7225618eaa6d402169 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:15:08 -0300 Subject: [PATCH 03/10] Feat: Add more params into tts request --- rvc_python/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index 33f2c7d..c16d49b 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -25,6 +25,9 @@ class SetModelsDirRequest(BaseModel): class TTSRequest(BaseModel): text: str voice: str + rate: str = "+0%", + volume: str = "+0%", + pitch: str = "+0Hz", def setup_routes(app: FastAPI): @app.post("/convert") @@ -129,7 +132,7 @@ async def tts(request: TTSRequest): logger.info("Received request to generate audio by tts") output_path = tmp_output.name - communicate = edge_tts.Communicate(request.text, request.voice) + communicate = edge_tts.Communicate(request.text, request.voice, request.rate, request.volume, request.pitch) await communicate.save(output_path) output_data = tmp_output.read() From 56699c1ec0fc7c24716f391f7726de69bddcb7b1 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:22:34 -0300 Subject: [PATCH 04/10] Feat: Add more params into tts request --- rvc_python/api.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index c16d49b..00e2ad4 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -25,9 +25,6 @@ class SetModelsDirRequest(BaseModel): class TTSRequest(BaseModel): text: str voice: str - rate: str = "+0%", - volume: str = "+0%", - pitch: str = "+0Hz", def setup_routes(app: FastAPI): @app.post("/convert") @@ -132,7 +129,13 @@ async def tts(request: TTSRequest): logger.info("Received request to generate audio by tts") output_path = tmp_output.name - communicate = edge_tts.Communicate(request.text, request.voice, request.rate, request.volume, request.pitch) + communicate = edge_tts.Communicate( + text=request.text, + voice=request.voice, + rate=request.rate | "+0%", + volume=request.volume | "+0%", + pitch=request.pitch | "+0Hz" + ) await communicate.save(output_path) output_data = tmp_output.read() From 80ea87207faefdf11f3b18a23a0ddf125e765a6c Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:24:09 -0300 Subject: [PATCH 05/10] Fix: Params from tts request --- rvc_python/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rvc_python/api.py b/rvc_python/api.py index 00e2ad4..775ee92 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -25,6 +25,9 @@ class SetModelsDirRequest(BaseModel): class TTSRequest(BaseModel): text: str voice: str + rate: str | None = None + volume: str | None = None + pitch: str | None = None def setup_routes(app: FastAPI): @app.post("/convert") From a134dd1f3a35972b5c3701c2f181430152607c83 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:25:44 -0300 Subject: [PATCH 06/10] Fix: Params from tts request --- rvc_python/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index 775ee92..cae69b9 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -25,9 +25,9 @@ class SetModelsDirRequest(BaseModel): class TTSRequest(BaseModel): text: str voice: str - rate: str | None = None - volume: str | None = None - pitch: str | None = None + rate: str | None = "+0%" + volume: str | None = "+0%" + pitch: str | None = "+0Hz" def setup_routes(app: FastAPI): @app.post("/convert") From 3c650badf7bdb1bc5d22f1bbd10745821fba5448 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:26:43 -0300 Subject: [PATCH 07/10] Fix: Params from tts request --- rvc_python/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index cae69b9..39c5276 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -135,9 +135,9 @@ async def tts(request: TTSRequest): communicate = edge_tts.Communicate( text=request.text, voice=request.voice, - rate=request.rate | "+0%", - volume=request.volume | "+0%", - pitch=request.pitch | "+0Hz" + rate=request.rate, + volume=request.volume, + pitch=request.pitch ) await communicate.save(output_path) From f2b1d0adca937c1442b645bbcc9a6472796cd425 Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:41:49 -0300 Subject: [PATCH 08/10] Feat: TTS convert directly from request --- rvc_python/api.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index 39c5276..32e9c7f 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -24,7 +24,7 @@ class SetModelsDirRequest(BaseModel): class TTSRequest(BaseModel): text: str - voice: str + voice: str | None = "Microsoft Server Speech Text to " rate: str | None = "+0%" volume: str | None = "+0%" pitch: str | None = "+0Hz" @@ -127,9 +127,14 @@ def set_models_dir(request: SetModelsDirRequest): @app.post("/tts") async def tts(request: TTSRequest): + if not app.state.rvc.current_model: + raise HTTPException(status_code=400, detail="No model loaded. Please load a model first.") + + tmp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") tmp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") try: logger.info("Received request to generate audio by tts") + input_path = tmp_input.name output_path = tmp_output.name communicate = edge_tts.Communicate( @@ -139,7 +144,9 @@ async def tts(request: TTSRequest): volume=request.volume, pitch=request.pitch ) - await communicate.save(output_path) + await communicate.save(input_path) + + app.state.rvc.infer_file(input_path, output_path) output_data = tmp_output.read() return Response(content=output_data, media_type="audio/wav") @@ -147,7 +154,9 @@ async def tts(request: TTSRequest): logger.error(e) raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") finally: + tmp_input.close() tmp_output.close() + os.unlink(tmp_input.name) os.unlink(tmp_output.name) def create_app(): From 381f3958f71a4c0a1f7e7b2c0fa26abaf992313a Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:46:19 -0300 Subject: [PATCH 09/10] Feat: List voices request --- rvc_python/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rvc_python/api.py b/rvc_python/api.py index 32e9c7f..6c19f61 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -125,6 +125,10 @@ def set_models_dir(request: SetModelsDirRequest): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) + @app.get("/voices") + def list_voices(): + return JSONResponse(content={"voices": edge_tts.list_voices()}) + @app.post("/tts") async def tts(request: TTSRequest): if not app.state.rvc.current_model: From 593eebb12878b942a65251982e7f72788f27601a Mon Sep 17 00:00:00 2001 From: TheMhv Date: Sat, 21 Sep 2024 10:47:56 -0300 Subject: [PATCH 10/10] Fix: async function error --- rvc_python/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rvc_python/api.py b/rvc_python/api.py index 6c19f61..3e3863c 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -126,8 +126,8 @@ def set_models_dir(request: SetModelsDirRequest): raise HTTPException(status_code=400, detail=str(e)) @app.get("/voices") - def list_voices(): - return JSONResponse(content={"voices": edge_tts.list_voices()}) + async def list_voices(): + return JSONResponse(content={"voices": await edge_tts.list_voices()}) @app.post("/tts") async def tts(request: TTSRequest):