hackkrmu · harshgit1406 · Feb 21, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1,6 @@
+LIVEKIT_URL=<your LiveKit server URL>
+LIVEKIT_API_KEY=<your API Key>
+LIVEKIT_API_SECRET=<your API Secret>
+OPENAI_API_KEY=<To use other providers, press Enter for now and edit .env.local>
+DEEPGRAM_API_KEY=<To use other providers, press Enter for now and edit .env.local>
+CARTESIA_API_KEY=<To use other providers, press Enter for now and edit .env.local>
diff --git a/.env.local b/.env.local
@@ -0,0 +1,6 @@
+CARTESIA_API_KEY="
+DEEPGRAM_API_KEY="
+LIVEKIT_API_KEY="
+LIVEKIT_API_SECRET="
+LIVEKIT_URL="
+OPENAI_API_KEY="
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.env.local
+venv/
+.DS_Store
diff --git a/agent.py b/agent.py
@@ -0,0 +1,78 @@
+import logging
+
+from dotenv import load_dotenv
+from livekit.agents import (
+    AutoSubscribe,
+    JobContext,
+    JobProcess,
+    WorkerOptions,
+    cli,
+    llm,
+    metrics,
+)
+from livekit.agents.pipeline import VoicePipelineAgent
+from livekit.plugins import cartesia, openai, deepgram, silero, turn_detector
+
+
+load_dotenv(dotenv_path=".env.local")
+logger = logging.getLogger("voice-agent")
+
+
+def prewarm(proc: JobProcess):
+    proc.userdata["vad"] = silero.VAD.load()
+
+
+async def entrypoint(ctx: JobContext):
+    initial_ctx = llm.ChatContext().append(
+        role="system",
+        text=(
+            "You are a voice assistant created by LiveKit. Your interface with users will be voice. "
+            "You should use short and concise responses, and avoiding usage of unpronouncable punctuation. "
+            "You were created as a demo to showcase the capabilities of LiveKit's agents framework."
+        ),
+    )
+
+    logger.info(f"connecting to room {ctx.room.name}")
+    await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
+
+    # Wait for the first participant to connect
+    participant = await ctx.wait_for_participant()
+    logger.info(f"starting voice assistant for participant {participant.identity}")
+
+    # This project is configured to use Deepgram STT, OpenAI LLM and Cartesia TTS plugins
+    # Other great providers exist like Cerebras, ElevenLabs, Groq, Play.ht, Rime, and more
+    # Learn more and pick the best one for your app:
+    # https://docs.livekit.io/agents/plugins
+    agent = VoicePipelineAgent(
+        vad=ctx.proc.userdata["vad"],
+        stt=deepgram.STT(),
+        llm=openai.LLM(model="gpt-4o-mini"),
+        tts=cartesia.TTS(),
+        turn_detector=turn_detector.EOUModel(),
+        # minimum delay for endpointing, used when turn detector believes the user is done with their turn
+        min_endpointing_delay=0.5,
+        # maximum delay for endpointing, used when turn detector does not believe the user is done with their turn
+        max_endpointing_delay=5.0,
+        chat_ctx=initial_ctx,
+    )
+
+    usage_collector = metrics.UsageCollector()
+
+    @agent.on("metrics_collected")
+    def on_metrics_collected(agent_metrics: metrics.AgentMetrics):
+        metrics.log_metrics(agent_metrics)
+        usage_collector.collect(agent_metrics)
+
+    agent.start(ctx.room, participant)
+
+    # The agent should be polite and greet the user when it joins :)
+    await agent.say("Hey, how can I help you today?", allow_interruptions=True)
+
+
+if __name__ == "__main__":
+    cli.run_app(
+        WorkerOptions(
+            entrypoint_fnc=entrypoint,
+            prewarm_fnc=prewarm,
+        ),
+    )
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+livekit-agents>=0.12.11
+livekit-plugins-openai>=0.10.17
+livekit-plugins-cartesia>=0.4.7
+livekit-plugins-deepgram>=0.6.17
+livekit-plugins-silero>=0.7.4
+livekit-plugins-turn-detector>=0.4.0
+python-dotenv~=1.0