Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,6 @@ __marimo__/

# Streamlit
.streamlit/secrets.toml

# Models
models/
45 changes: 45 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Makefile

# Use bash for better scripting
SHELL := /bin/bash

# Default command is 'help'
.DEFAULT_GOAL := help

## --------------------------------------
## Docker Commands
## --------------------------------------

.PHONY: build
build: ## 🛠️ Build or rebuild the Docker services
@echo ">> Building services..."
@docker-compose build

.PHONY: up
up: ## 🚀 Start all services in detached mode
@echo ">> Starting services in the background..."
@docker-compose up -d

.PHONY: down
down: ## 🛑 Stop and remove all services
@echo ">> Stopping and removing containers..."
@docker-compose down

.PHONY: logs
logs: ## 📜 View real-time logs for all services
@echo ">> Tailing logs (press Ctrl+C to stop)..."
@docker-compose logs -f

.PHONY: test
test: ## 🧪 Run pytest inside the app container
@echo ">> Running tests..."
@docker-compose run --rm app pytest

## --------------------------------------
## Help
## --------------------------------------

.PHONY: help
help: ## 🙋 Show this help message
@echo "Available commands:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_-LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
services:
# ScheduleBOT+ Application
app:
build:
context: .
dockerfile: docker/app/Dockerfile
container_name: schedulebot_app
ports:
- "7860:7860" # Expose Gradio's port
volumes:
- ./src:/app/src
env_file:
- .env
depends_on:
- duckling
restart: unless-stopped

# Duckling Service for Time/Date Extraction
duckling:
image: rasa/duckling:latest
container_name: duckling_service
ports:
- "8000:8000"
restart: unless-stopped
23 changes: 23 additions & 0 deletions docker/app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Use an official Python runtime as a parent image
FROM python:3.10-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file into the container
COPY requirements.txt .

# Install any needed packages specified in requirements.txt
# --no-cache-dir: Disables the cache to keep the image size smaller
# --trusted-host pypi.python.org: Can help avoid SSL issues in some networks
RUN pip install --no-cache-dir --trusted-host pypi.python.org -r requirements.txt

# Copy the rest of the application's source code from your host to your image filesystem.
COPY ./src /app/src
COPY ./models /app/models
COPY .env /app/.env

# Command to run the application when the container launches
# This will be the main entry point for your Gradio app in the next milestone.
# For now, we can use the command-line chat.
CMD ["python", "-m", "src.schedulebot.main"]
833 changes: 803 additions & 30 deletions notebooks/02b_train_knn_intent_classifier.ipynb

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions src/schedulebot/core/conversation_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from src.schedulebot.nlp.intent_classifier import IntentClassifier
from src.schedulebot.nlp.slot_filler import SlotFiller


class ConversationManager:
def __init__(self, nlu_model_repo: str):
"""
Initializes the manager with the NLU models.
"""
self.intent_classifier = IntentClassifier(repo_id=nlu_model_repo)
self.slot_filler = SlotFiller()

def get_response(self, user_text: str) -> str:
"""
Processes the user's input and returns a text response.
"""
# 1. Classify the intent
intent = self.intent_classifier.predict(user_text)
print(f"[DEBUG: Classified intent: {intent}]")
print(f"[DEBUG: time_slot: {self.slot_filler.parse_time(user_text)}]")

# 2. Logic based on the intent
if intent == "greet":
return "Hello! How can I help you with your appointments today?"

if intent == "bye":
return "Goodbye! Have a great day."

if intent in ["book", "resched"]:
# 3. Extract date and time if required by the intent
time_slot = self.slot_filler.parse_time(user_text)

action = "book" if intent == "book" else "reschedule"

if time_slot:
return f"Okay, I see you want to {action} an appointment for {time_slot['value']}. Is that correct?"
else:
return "Sure, but I didn't understand the date and time. Could you please specify when?"

if intent == "cancel":
return "Okay, I understand you want to cancel an appointment. Can you specify which one?"

if intent == "avail":
return "I'm checking your availability now. One moment..."

# Fallback for unhandled intents
return "I'm not sure I understood your request."
30 changes: 30 additions & 0 deletions src/schedulebot/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from src.schedulebot.core.conversation_manager import ConversationManager
import os
from dotenv import load_dotenv


def main():
"""
Main loop to interact with the chatbot from the command line.
"""
# Load environment variables
load_dotenv()
repo_id = os.getenv("HUB_MODEL_ID")

print("Initializing ConversationManager...")
manager = ConversationManager(nlu_model_repo=repo_id)

print("\nScheduleBOT+ is active! Type 'exit' to quit.")
print("--------------------------------------------------")

while True:
user_input = input("You: ")
if user_input.lower() == "exit":
break

bot_response = manager.get_response(user_input)
print(f"Bot: {bot_response}")


if __name__ == "__main__":
main()
53 changes: 37 additions & 16 deletions src/schedulebot/nlp/intent_classifier.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,58 @@
import os
import json
import joblib
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch


class IntentClassifier:
def __init__(self, model_repo_id: str):
def __init__(self, repo_id: str):
"""
Loads the fine-tuned model and tokenizer from the Hugging Face Hub.
Loads the KNN classifier and its artifacts from a Hugging Face Hub repository.

Args:
repo_id (str): The ID of the repository on the Hub (e.g., 'username/repo-name').
"""
self.tokenizer = AutoTokenizer.from_pretrained(model_repo_id)
self.model = AutoModelForSequenceClassification.from_pretrained(model_repo_id)
self.id_to_label = self.model.config.id2label
# Ensure the HF_TOKEN is available if the repo is private
# For public repos, this is not strictly necessary but good practice
hf_token = os.getenv("HF_TOKEN")

# 1. Download the artifacts from the Hub.
# hf_hub_download returns the local path to the cached file.
knn_model_path = hf_hub_download(
repo_id=repo_id, filename="knn_model.joblib", token=hf_token
)
id2label_path = hf_hub_download(
repo_id=repo_id, filename="id2label.json", token=hf_token
)

# 2. Load the Sentence Transformer model
self.embedding_model = SentenceTransformer(
"sentence-transformers/all-MiniLM-L6-v2"
)

# 3. Load the downloaded KNN model and label mapping
self.knn = joblib.load(knn_model_path)
with open(id2label_path, "r") as f:
self.id_to_label = {int(k): v for k, v in json.load(f).items()}

def predict(self, text: str) -> str:
"""
Predicts the intent for a given text string.
Predicts the intent for a given text string using embeddings and KNN.
"""
inputs = self.tokenizer(
text, return_tensors="pt", padding=True, truncation=True
)
with torch.no_grad():
logits = self.model(**inputs).logits

predicted_class_id = torch.argmax(logits, dim=1).item()
text_embedding = self.embedding_model.encode(
text, convert_to_tensor=False
).reshape(1, -1)
predicted_class_id = self.knn.predict(text_embedding)[0]
return self.id_to_label[predicted_class_id]


if __name__ == "__main__":
# Load the model from the Hub
load_dotenv()
model_repo_id = os.getenv("HUB_MODEL_ID")
classifier = IntentClassifier(model_repo_id=model_repo_id)
classifier = IntentClassifier(repo_id=model_repo_id)

# Tests
text1 = "I want to schedule a meeting with John for next Tuesday"
Expand Down
65 changes: 65 additions & 0 deletions src/schedulebot/nlp/slot_filler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import requests
from datetime import datetime


class SlotFiller:
def __init__(self, duckling_url: str = "http://localhost:8000/parse"):
"""
Initializes the SlotFiller with the Duckling service URL.
"""
self.duckling_url = duckling_url

def parse_time(self, text: str) -> dict | None:
"""
Sends text to Duckling to extract time-related information.
Returns the first valid time entity found.
"""
try:
# Data to send to Duckling
data = {"text": text, "locale": "en_US", "dims": '["time"]'}

response = requests.post(self.duckling_url, data=data)
response.raise_for_status() # Raises an exception for HTTP errors

parsed_data = response.json()

if not parsed_data:
return None

# Extract the most relevant value
# Duckling can return multiple values (e.g., "tomorrow at 5" -> tomorrow's date, hour 5)
# We look for the 'value' type that contains complete date and time.
for entity in parsed_data:
if entity.get("dim") == "time" and entity["value"]["type"] == "value":
raw_time = entity["value"]["value"]
# Convert to a standard format (ISO 8601)
dt_object = datetime.fromisoformat(raw_time)
return {"text": entity["body"], "value": dt_object.isoformat()}

return None

except requests.exceptions.RequestException as e:
print(
f"ERROR: Unable to communicate with Duckling. Make sure it is running. Details: {e}"
)
return None


# Block to test the script directly
if __name__ == "__main__":
filler = SlotFiller()

test_text_1 = "I would like to book a meeting for tomorrow at 5 PM"
time_info_1 = filler.parse_time(test_text_1)
print(f"Text: '{test_text_1}'")
print(f"Extracted info: {time_info_1}\n")

test_text_2 = "Can we meet next Friday?"
time_info_2 = filler.parse_time(test_text_2)
print(f"Text: '{test_text_2}'")
print(f"Extracted info: {time_info_2}\n")

test_text_3 = "Hi, how are you?"
time_info_3 = filler.parse_time(test_text_3)
print(f"Text: '{test_text_3}'")
print(f"Extracted info: {time_info_3}\n")
Loading