diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..6556c5f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +.venv +__pycache__ +*.pyc +.pytest_cache +.git +docs +data/*.csv +.env +rf_stress_model.joblib diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..b6ba187 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,43 @@ +name: Build and publish Docker image + +on: + push: + branches: [ main ] + +permissions: + contents: read + packages: write + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute lowercase owner + id: owner_lower + run: | + # GitHub repository_owner can contain uppercase letters; container registries require lowercase + owner=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + echo "owner=$owner" >> "$GITHUB_OUTPUT" + + - name: Build and push image + uses: docker/build-push-action@v4 + with: + push: true + tags: ghcr.io/${{ steps.owner_lower.outputs.owner }}/mindguard:latest + file: ./Dockerfile diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 469b2a6..b200904 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,18 +1,39 @@ name: Build and publish static demo site on: + # Run on pushes to any branch (except the generated gh-pages branch) so CI checks run before merge push: + # Run on all branches except the generated gh-pages branch + branches-ignore: + - 'gh-pages' + # Ignore generated artifacts to avoid workflow retrigger loops + paths-ignore: + - 'docs/**' + - 'rf_stress_model.joblib' + # Also run the workflow on pull requests targeting main so you can see results before merging + pull_request: branches: [ main ] + paths-ignore: + - 'docs/**' + - 'rf_stress_model.joblib' permissions: contents: write + pages: write + id-token: write jobs: build-and-publish: runs-on: ubuntu-latest + concurrency: + # Use a per-branch concurrency group so runs for different branches do not cancel each other + group: publish-docs-${{ github.ref_name }} + cancel-in-progress: true steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v4 @@ -29,10 +50,12 @@ jobs: python simple_demo.py python generate_site.py - - name: Commit generated docs - uses: EndBug/add-and-commit@v9 + - name: Deploy site to gh-pages branch + uses: peaceiris/actions-gh-pages@v3 with: - author_name: github-actions[bot] - author_email: github-actions[bot]@users.noreply.github.com - message: "ci: generate site (docs)" - add: 'docs' + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs + publish_branch: gh-pages + # keep repository history small by not committing other files + user_name: github-actions[bot] + user_email: github-actions[bot]@users.noreply.github.com diff --git a/.gitignore b/.gitignore index f0c6272..549752a 100644 --- a/.gitignore +++ b/.gitignore @@ -78,4 +78,7 @@ coverage/ coverage.xml .cache/ *.bak -*.swp \ No newline at end of file +*.swp + + +REPOSITORY_DETAILS.md \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3d6ba42 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.10-slim + +# Prevent Python from writing .pyc files and buffering stdout/stderr +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Install system deps (minimal) and Python deps +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Copy project +COPY . /app + +# Expose Streamlit default port +EXPOSE 8501 + +ENV STREAMLIT_SERVER_HEADLESS=true + +# Run the Streamlit app +CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/README.md b/README.md index 0ae8d6c..4161953 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,20 @@ If you don't have these components, use `simple_demo.py` to experiment quickly. Presentation and further info - The project presentation `MindGuard_An_Intelligent_Assistant_for_Student_Stress_Management_FinalProoposal.pptx` appears in the repository root. It likely contains project motivation, dataset descriptions, and proposed model architectures. If you want, I can extract its text and slides into a Markdown summary (I can do that automatically if you want me to install and run `python-pptx` locally). +## Environment variables +``` +GOOGLE_API_KEY=your_google_api_key_here +``` + +If `GOOGLE_API_KEY` is set, the Streamlit app (`app.py`) will call the Google Generative AI (Gemini) to create personalized recommendations. If the key is not set, the app now provides a safe, local fallback set of recommendations so the UI still works without external APIs. + +Note about Gemini quotas and errors +- The Gemini API enforces quotas and rate limits. If your project exceeds the quota (or the API returns rate-limit errors), the application will now: + 1. show a concise warning in the UI informing you that the external AI is unavailable, and + 2. use a safe local fallback recommendation generator so users still receive helpful, non-medical advice. + +If you rely on Gemini for richer responses, monitor your Google Cloud quota and billing, or use a paid plan to increase rate limits. The app logs the full API error to the server console for debugging but intentionally avoids showing raw API errors in the UI. + Files added/changed in this update - `README.md` — this file (expanded with aim, goals, and instructions). - `data/StressLevelDataset.csv` — small synthetic sample dataset (so the EDA and demo can run). @@ -78,4 +92,25 @@ Presentation and further info Recommendations / result view: ![Recommendations view](assets/screenshots/recommendations_view.svg) ---- + +Docker / container deployment +-------------------------------- +I added a Dockerfile and docker-compose configuration so you can run the Streamlit app in a container or publish the image to a container registry. + +Run locally with Docker Compose + +1. Build and run: +```bash +docker compose up --build +``` +2. Open the app at: http://localhost:8501 + +Notes: +- The container reads `GOOGLE_API_KEY` from the environment. You can provide it with `export GOOGLE_API_KEY=...` before running `docker compose up`, or create a `.env` file with that variable (do not commit secrets). +- The image exposes port 8501. + +Publish image automatically (GitHub) + +- I added a GitHub Actions workflow `.github/workflows/docker-publish.yml`. On push to `main` it builds the image and pushes it to GitHub Container Registry (GHCR) as `ghcr.io//mindguard:latest`. + + diff --git a/app.py b/app.py index 2e44a9c..aac6ace 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ import os from dotenv import load_dotenv import google.generativeai as genai +import joblib load_dotenv() @@ -50,8 +51,8 @@ def get_h2o_model(model_path): def get_gemini_recommendations(stress_level_text, input_data): api_key = os.getenv("GOOGLE_API_KEY") if not api_key: - st.error("Google API Key not found. Please ensure it is set in the .env file.") - return None + # No Google API key — provide a safe local fallback recommendation instead + return generate_local_recommendations(stress_level_text, input_data) try: genai.configure(api_key=api_key) model = genai.GenerativeModel(model_name='gemini-2.5-pro') @@ -90,18 +91,96 @@ def get_gemini_recommendations(stress_level_text, input_data): response = model.generate_content(prompt) return response.text except Exception as e: - st.error(f"An error occurred while calling Gemini AI: {e}") - return None + # Don't dump the raw API error to the user UI. Show a friendly message and fall back. + err_text = str(e) + # Detect common quota/rate-limit signals (simplified check) + if '429' in err_text or 'quota' in err_text.lower() or 'rate limit' in err_text.lower(): + st.warning("The Gemini API is temporarily unavailable due to quota or rate limits. Showing local recommendations instead.") + st.info("To enable richer AI-generated advice, add a valid `GOOGLE_API_KEY` with an appropriate quota/billing plan. See the README for details.") + else: + st.warning("The Gemini AI service returned an error. Showing local recommendations instead.") + + # Log the full error to the server console for debugging, but keep the UI message concise. + print("Gemini API error:", err_text) + + # Fall back to local recommendations + return generate_local_recommendations(stress_level_text, input_data) + + +def generate_local_recommendations(stress_level_text, input_data): + """Return a short, safe recommendation string when Gemini API is not available. + The content is intentionally generic and supportive (not medical advice). + """ + # Extract a few key values safely + try: + sleep_q = input_data.get('sleep_quality', [None])[0] + depression = input_data.get('depression', [None])[0] + academic = input_data.get('academic_performance', [None])[0] + teacher_rel = input_data.get('teacher_student_relationship', [None])[0] + except Exception: + sleep_q = depression = academic = teacher_rel = None + + support = ( + "It sounds like you are going through a challenging time — that's understandable. " + "You're taking a good step by checking in on your wellbeing." + ) + + bullets = [] + # Tailor bullets simply based on a few factors + if sleep_q is not None and sleep_q <= 2: + bullets.append("Try a 15-minute wind-down routine before bed to improve sleep quality.") + else: + bullets.append("Maintain regular sleep routines — consistent bed and wake times help a lot.") -MODEL_PATH = "XGBoost_1_AutoML_1_20251102_85004" -ml_model = get_h2o_model(MODEL_PATH) + if academic is not None and academic <= 2: + bullets.append("Break study tasks into 25-minute focused sessions with short breaks (Pomodoro).") + else: + bullets.append("Keep using structured study blocks and take short breaks to avoid burnout.") + + if depression is not None and depression >= 15: + bullets.append("If low mood or worry persists, consider contacting campus mental health services or a trusted professional.") + elif teacher_rel is not None and teacher_rel <= 2: + bullets.append("Try reaching out to a trusted instructor or peer to discuss academic concerns.") + else: + bullets.append("Reach out to friends or family and share how you feel — social support helps.") + + # Compose markdown-style text similar to Gemini output + md = f"**Support:** {support}\n\n**Recommendations:**\n" + for b in bullets: + md += f"- {b}\n" + + return md + + + +# Prefer a lightweight scikit-learn model (created by `simple_demo.py`) when present. +SKLEARN_MODEL_PATH = "rf_stress_model.joblib" +use_sklearn = False +sklearn_model = None +if os.path.exists(SKLEARN_MODEL_PATH): + try: + sklearn_model = joblib.load(SKLEARN_MODEL_PATH) + use_sklearn = True + print(f"Loaded scikit-learn model from '{SKLEARN_MODEL_PATH}'. Streamlit will use this model (no Java/H2O required).") + except Exception as e: + print(f"Failed to load scikit-learn model '{SKLEARN_MODEL_PATH}': {e}. Falling back to H2O if available.") + +ml_model = None +if not use_sklearn: + MODEL_PATH = "XGBoost_1_AutoML_1_20251102_85004" + ml_model = get_h2o_model(MODEL_PATH) st.title("🧠 MindGuard: Assess Your Stress Level") st.write("Answer a few questions to get an assessment of your current stress level and personalized recommendations.") -if ml_model: +# Proceed if either a scikit-learn model or the H2O model is available +if use_sklearn or ml_model: + if use_sklearn: + st.info("Using local scikit-learn model (rf_stress_model.joblib) for predictions.") + else: + st.info("Using H2O model for predictions.") st.subheader("Please rate the following factors:") col1, col2 = st.columns(2) with col1: @@ -130,11 +209,24 @@ def get_gemini_recommendations(stress_level_text, input_data): } input_df = pd.DataFrame(input_data) - h2o_input_frame = h2o.H2OFrame(input_df) - # 2. Make Prediction - prediction = ml_model.predict(h2o_input_frame) - predicted_level = prediction['predict'].as_data_frame().iloc[0, 0] + # 2. Make Prediction (scikit-learn fallback or H2O) + if use_sklearn and sklearn_model is not None: + try: + preds = sklearn_model.predict(input_df) + # sklearn returns integer labels (0/1/2) + predicted_level = float(preds[0]) + except Exception as e: + st.error(f"Failed to predict with scikit-learn model: {e}") + predicted_level = None + else: + if ml_model is None: + st.error("No ML model available (H2O model failed to load and no scikit-learn fallback found).") + predicted_level = None + else: + h2o_input_frame = h2o.H2OFrame(input_df) + prediction = ml_model.predict(h2o_input_frame) + predicted_level = prediction['predict'].as_data_frame().iloc[0, 0] stress_map = {0.0: "Low", 1.0: "Medium", 2.0: "High"} predicted_stress_text = stress_map.get(predicted_level, "Unknown") diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3d73b29 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +services: + mindguard: + build: . + image: mindguard:latest + ports: + - "8501:8501" + volumes: + # Mount current repo (read-only) so edits are visible during development + - ./:/app:ro + environment: + # Forward GOOGLE_API_KEY from host or .env + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + restart: unless-stopped diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..96117e7 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,32 @@ + + + + + + + MindGuard Demo + + + +
+

MindGuard — Live demo (static)

+

This static page is auto-generated from the repository demo model. It shows a sample input, the predicted stress level, and simple recommendations.

+ +

Sample input

+
{'sleep_quality': 3, 'teacher_student_relationship': 4, 'blood_pressure': 2, 'future_career_concerns': 3, 'depression': 8, 'academic_performance': 4, 'social_support': 2, 'self_esteem': 18, 'safety': 2, 'headache': 1, 'anxiety_level': 12, 'mental_health_history': 0, 'breathing_problem': 2, 'noise_level': 2, 'living_conditions': 2, 'basic_needs': 2, 'study_load': 2, 'peer_pressure': 2, 'extracurricular_activities': 1, 'bullying': 0}
+ +

Predicted stress level

+
1 (Medium)
+ +

Recommendation

+

You're assessed at MEDIUM stress. Try 15-minute wind-down routines before bed, Pomodoro study blocks (25/5), and discuss career concerns with a mentor.

+
+ + diff --git a/model.py b/model.py index 3f6f748..1e63b51 100644 --- a/model.py +++ b/model.py @@ -10,11 +10,18 @@ # --- 2. Load and Initial Data Inspection --- print("--- Loading Data ---") -try: - df = pd.read_csv('StressLevelDataset.csv') - print("Dataset loaded successfully.") -except FileNotFoundError: - print("Error: 'StressLevelDataset.csv' not found. Ensure it is in the same directory.") +df = None +# Try the repository root first, then the data/ folder. +for path in ("StressLevelDataset.csv", "data/StressLevelDataset.csv"): + try: + df = pd.read_csv(path) + print(f"Dataset loaded successfully from '{path}'.") + break + except FileNotFoundError: + continue + +if df is None: + print("Error: 'StressLevelDataset.csv' not found. Ensure it is in the repository root or in the data/ folder.") exit() print("\n--- First 5 rows of the dataset: ---") diff --git a/rf_stress_model.joblib b/rf_stress_model.joblib new file mode 100644 index 0000000..bd8613a Binary files /dev/null and b/rf_stress_model.joblib differ diff --git a/simple_demo.py b/simple_demo.py index c6eeaaf..4a6edcf 100644 --- a/simple_demo.py +++ b/simple_demo.py @@ -23,9 +23,18 @@ def train_and_save(df, model_path='rf_stress_model.joblib'): X = df.drop(columns=['stress_level']) y = df['stress_level'] - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, random_state=42, stratify=y - ) + # Try a stratified split; if the dataset is very small this can fail + # because the requested test set would contain fewer samples than classes. + try: + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.33, random_state=42, stratify=y + ) + except ValueError as e: + print("\nWarning: stratified train/test split failed:", e) + print("Falling back to a non-stratified split for this small dataset.") + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.33, random_state=42, stratify=None + ) clf = RandomForestClassifier(n_estimators=100, random_state=42) clf.fit(X_train, y_train)