daswer123 · TheMhv · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
   "fastapi",
   "pydantic",
   "python-multipart",
+  "edge-tts",
 ]
 
 [project.urls]

diff --git a/requirements.txt b/requirements.txt
@@ -15,4 +15,5 @@ loguru
 uvicorn
 fastapi
 pydantic
-python-multipart
+python-multipart
+edge-tts
diff --git a/rvc_python/api.py b/rvc_python/api.py
@@ -3,6 +3,7 @@
 from fastapi.responses import Response, JSONResponse
 from loguru import logger
 from pydantic import BaseModel
+import edge_tts
 import tempfile
 import base64
 import shutil
@@ -21,6 +22,13 @@ class SetParamsRequest(BaseModel):
 class SetModelsDirRequest(BaseModel):
     models_dir: str
 
+class TTSRequest(BaseModel):
+    text: str
+    voice: str | None = "Microsoft Server Speech Text to "
-    voice: str | None = "Microsoft Server Speech Text to "
+    voice: str | None = "en-US-AriaNeural"
-    voice: str | None = "Microsoft Server Speech Text to "
+    voice: str | None = "en-US-AriaNeural"
+    rate: str | None = "+0%"
+    volume: str | None = "+0%"
+    pitch: str | None = "+0Hz"
+
 def setup_routes(app: FastAPI):
     @app.post("/convert")
     def rvc_convert(request: ConvertAudioRequest):
@@ -117,6 +125,44 @@ def set_models_dir(request: SetModelsDirRequest):
         except Exception as e:
             raise HTTPException(status_code=400, detail=str(e))
 
+    @app.get("/voices")
+    async def list_voices():
+        return JSONResponse(content={"voices": await edge_tts.list_voices()})
-    @app.get("/voices")
-    async def list_voices():
-        return JSONResponse(content={"voices": await edge_tts.list_voices()})
+    @app.get("/voices")
+    async def list_voices():
+        try:
+            voices = await edge_tts.list_voices()
+            return JSONResponse(content={"voices": voices})
+        except Exception as e:
+            logger.error(f"Error retrieving voices: {e}")
+            raise HTTPException(status_code=500, detail="Failed to retrieve voices.") from e
-    @app.get("/voices")
-    async def list_voices():
-        return JSONResponse(content={"voices": await edge_tts.list_voices()})
+    @app.get("/voices")
+    async def list_voices():
+        try:
+            voices = await edge_tts.list_voices()
+            return JSONResponse(content={"voices": voices})
+        except Exception as e:
+            logger.error(f"Error retrieving voices: {e}")
+            raise HTTPException(status_code=500, detail="Failed to retrieve voices.") from e
+
+    @app.post("/tts")
+    async def tts(request: TTSRequest):
+        if not app.state.rvc.current_model:
+            raise HTTPException(status_code=400, detail="No model loaded. Please load a model first.")
+
+        tmp_input = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        tmp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        try:
+            logger.info("Received request to generate audio by tts")
+            input_path = tmp_input.name
+            output_path = tmp_output.name
+
+            communicate = edge_tts.Communicate(
+                text=request.text,
+                voice=request.voice,
+                rate=request.rate,
+                volume=request.volume,
+                pitch=request.pitch
+            )
+            await communicate.save(input_path)
+
+            app.state.rvc.infer_file(input_path, output_path)
-            app.state.rvc.infer_file(input_path, output_path)
+            await asyncio.to_thread(app.state.rvc.infer_file, input_path, output_path)
-            app.state.rvc.infer_file(input_path, output_path)
+            await asyncio.to_thread(app.state.rvc.infer_file, input_path, output_path)
+
+            output_data = tmp_output.read()
-            output_data = tmp_output.read()
+            tmp_output.seek(0)
+            output_data = tmp_output.read()
-            output_data = tmp_output.read()
+            tmp_output.seek(0)
+            output_data = tmp_output.read()
+            return Response(content=output_data, media_type="audio/wav")
+        except Exception as e:
+            logger.error(e)
+            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
-            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") from e
-            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") from e
+        finally:
+            tmp_input.close()
+            tmp_output.close()
+            os.unlink(tmp_input.name)
+            os.unlink(tmp_output.name)
+
 def create_app():
     app = FastAPI()