diff --git a/pyproject.toml b/pyproject.toml index 86798d6..d39c408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "fastapi", "pydantic", "python-multipart", + "edge-tts", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index a1c4e53..50b692d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ loguru uvicorn fastapi pydantic -python-multipart \ No newline at end of file +python-multipart +edge-tts \ No newline at end of file diff --git a/rvc_python/api.py b/rvc_python/api.py index 60bd4c0..3e3863c 100644 --- a/rvc_python/api.py +++ b/rvc_python/api.py @@ -3,6 +3,7 @@ from fastapi.responses import Response, JSONResponse from loguru import logger from pydantic import BaseModel +import edge_tts import tempfile import base64 import shutil @@ -21,6 +22,13 @@ class SetParamsRequest(BaseModel): class SetModelsDirRequest(BaseModel): models_dir: str +class TTSRequest(BaseModel): + text: str + voice: str | None = "Microsoft Server Speech Text to " + rate: str | None = "+0%" + volume: str | None = "+0%" + pitch: str | None = "+0Hz" + def setup_routes(app: FastAPI): @app.post("/convert") def rvc_convert(request: ConvertAudioRequest): @@ -117,6 +125,44 @@ def set_models_dir(request: SetModelsDirRequest): except Exception as e: raise HTTPException(status_code=400, detail=str(e)) + @app.get("/voices") + async def list_voices(): + return JSONResponse(content={"voices": await edge_tts.list_voices()}) + + @app.post("/tts") + async def tts(request: TTSRequest): + if not app.state.rvc.current_model: + raise HTTPException(status_code=400, detail="No model loaded. Please load a model first.") + + tmp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") + tmp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") + try: + logger.info("Received request to generate audio by tts") + input_path = tmp_input.name + output_path = tmp_output.name + + communicate = edge_tts.Communicate( + text=request.text, + voice=request.voice, + rate=request.rate, + volume=request.volume, + pitch=request.pitch + ) + await communicate.save(input_path) + + app.state.rvc.infer_file(input_path, output_path) + + output_data = tmp_output.read() + return Response(content=output_data, media_type="audio/wav") + except Exception as e: + logger.error(e) + raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") + finally: + tmp_input.close() + tmp_output.close() + os.unlink(tmp_input.name) + os.unlink(tmp_output.name) + def create_app(): app = FastAPI()