Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
LIVEKIT_URL=<your LiveKit server URL>
LIVEKIT_API_KEY=<your API Key>
LIVEKIT_API_SECRET=<your API Secret>
OPENAI_API_KEY=<To use other providers, press Enter for now and edit .env.local>
DEEPGRAM_API_KEY=<To use other providers, press Enter for now and edit .env.local>
CARTESIA_API_KEY=<To use other providers, press Enter for now and edit .env.local>
6 changes: 6 additions & 0 deletions .env.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CARTESIA_API_KEY="
DEEPGRAM_API_KEY="
LIVEKIT_API_KEY="
LIVEKIT_API_SECRET="
LIVEKIT_URL="
OPENAI_API_KEY="
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.env.local
venv/
.DS_Store
78 changes: 78 additions & 0 deletions agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging

from dotenv import load_dotenv
from livekit.agents import (
AutoSubscribe,
JobContext,
JobProcess,
WorkerOptions,
cli,
llm,
metrics,
)
from livekit.agents.pipeline import VoicePipelineAgent
from livekit.plugins import cartesia, openai, deepgram, silero, turn_detector


load_dotenv(dotenv_path=".env.local")
logger = logging.getLogger("voice-agent")


def prewarm(proc: JobProcess):
proc.userdata["vad"] = silero.VAD.load()


async def entrypoint(ctx: JobContext):
initial_ctx = llm.ChatContext().append(
role="system",
text=(
"You are a voice assistant created by LiveKit. Your interface with users will be voice. "
"You should use short and concise responses, and avoiding usage of unpronouncable punctuation. "
"You were created as a demo to showcase the capabilities of LiveKit's agents framework."
),
)

logger.info(f"connecting to room {ctx.room.name}")
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)

# Wait for the first participant to connect
participant = await ctx.wait_for_participant()
logger.info(f"starting voice assistant for participant {participant.identity}")

# This project is configured to use Deepgram STT, OpenAI LLM and Cartesia TTS plugins
# Other great providers exist like Cerebras, ElevenLabs, Groq, Play.ht, Rime, and more
# Learn more and pick the best one for your app:
# https://docs.livekit.io/agents/plugins
agent = VoicePipelineAgent(
vad=ctx.proc.userdata["vad"],
stt=deepgram.STT(),
llm=openai.LLM(model="gpt-4o-mini"),
tts=cartesia.TTS(),
turn_detector=turn_detector.EOUModel(),
# minimum delay for endpointing, used when turn detector believes the user is done with their turn
min_endpointing_delay=0.5,
# maximum delay for endpointing, used when turn detector does not believe the user is done with their turn
max_endpointing_delay=5.0,
chat_ctx=initial_ctx,
)

usage_collector = metrics.UsageCollector()

@agent.on("metrics_collected")
def on_metrics_collected(agent_metrics: metrics.AgentMetrics):
metrics.log_metrics(agent_metrics)
usage_collector.collect(agent_metrics)

agent.start(ctx.room, participant)

# The agent should be polite and greet the user when it joins :)
await agent.say("Hey, how can I help you today?", allow_interruptions=True)


if __name__ == "__main__":
cli.run_app(
WorkerOptions(
entrypoint_fnc=entrypoint,
prewarm_fnc=prewarm,
),
)
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
livekit-agents>=0.12.11
livekit-plugins-openai>=0.10.17
livekit-plugins-cartesia>=0.4.7
livekit-plugins-deepgram>=0.6.17
livekit-plugins-silero>=0.7.4
livekit-plugins-turn-detector>=0.4.0
python-dotenv~=1.0