Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0007704
feat: event to load webpages in memory
akiva10b Jan 14, 2026
9d368d6
Merge branch 'master' into wip-chatbot
akiva10b Jan 20, 2026
2da7271
feat: add user_id secret handler
akiva10b Jan 21, 2026
7ded8a7
Merge branch 'wip-chatbot' of https://github.com/Sefaria/Sefaria-Proj…
akiva10b Jan 21, 2026
20170a8
feat: add chatbot
akiva10b Jan 21, 2026
1b03e5c
feat: add live base url
akiva10b Jan 21, 2026
96cb059
feat: make remote update instant and allow copy
akiva10b Jan 25, 2026
c40324a
feat: add user IDs
akiva10b Jan 27, 2026
8f52d47
Merge branch 'master' into wip-chatbot
akiva10b Jan 28, 2026
1083d18
feat: chatbot user hashing and encryption, whitelisting for experiments
akiva10b Feb 1, 2026
3cce159
chore: fix migrations
akiva10b Feb 1, 2026
b1cfc9f
Merge pull request #3066 from Sefaria/pr-3033
akiva10b Feb 1, 2026
b7bdb27
chore: fix migration for constraint issue
akiva10b Feb 1, 2026
d167f10
feat: load bot from remot
akiva10b Feb 1, 2026
f07e0f3
chore: make CHATBOT_API_BASE_URL fallback
akiva10b Feb 2, 2026
495864f
chore: add CHATBOT_API_BASE_URL
akiva10b Feb 2, 2026
8ceedca
chore: revert client pull
akiva10b Feb 2, 2026
453888e
fix: fix incorrect access of settings
yitzhakc Feb 3, 2026
f9bff0b
chore: add default
akiva10b Feb 4, 2026
c0432ad
chore: fix permissions rules
akiva10b Feb 5, 2026
51ca59a
chore: update gunicorn version to 25.0.3
nsantacruz Feb 9, 2026
0e9b37b
chore: downgrade gunicorn version to 23.0.0
nsantacruz Feb 9, 2026
cfd4bab
chore: downgrade gunicorn version to 23.0.0
nsantacruz Feb 9, 2026
0c43dfd
feat: control bot env with var
akiva10b Feb 10, 2026
f9b93e8
Merge branch 'fix-setup-tools-missing-bug' into wip-chatbot
akiva10b Feb 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ data:
CSRF_COOKIE_SAMESITE = os.getenv("CSRF_COOKIE_SAMESITE", "Lax")

SECRET_KEY = os.getenv("SECRET_KEY")
CHATBOT_USER_ID_SECRET = os.getenv("CHATBOT_USER_ID_SECRET", 'secret')
CHATBOT_API_BASE_URL = os.getenv("CHATBOT_API_BASE_URL", "https://chat-dev.sefaria.org/api")

EMAIL_BACKEND = 'anymail.backends.mandrill.EmailBackend'
DEFAULT_FROM_EMAIL = os.getenv("DEFAULT_FROM_EMAIL")
Expand Down
63 changes: 63 additions & 0 deletions reader/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from django import forms
from django.contrib import admin
from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.forms import UserChangeForm
from django.contrib.auth.models import User

from reader.models import UserExperimentSettings, _set_user_experiments


@admin.register(UserExperimentSettings)
class UserExperimentSettingsAdmin(admin.ModelAdmin):
list_display = ("user_email", "experiments")
list_display_links = ("user_email",)
raw_id_fields = ("user",)
search_fields = ("user__email", "user__username", "user__first_name", "user__last_name")
list_filter = ("experiments",)

def user_email(self, obj):
return obj.user.email
user_email.short_description = "Email"
user_email.admin_order_field = "user__email"


class UserExperimentsChangeForm(UserChangeForm):
experiments = forms.BooleanField(required=False, label="Experiments")

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.instance and self.instance.pk:
settings = UserExperimentSettings.objects.filter(user=self.instance).first()
self.fields["experiments"].initial = bool(settings and settings.experiments)

def save(self, commit=True):
user = super().save(commit=commit)
if commit:
_set_user_experiments(user, self.cleaned_data.get("experiments", False))
else:
# Store the value to be set after the user is saved
self._experiments_value = self.cleaned_data.get("experiments", False)
return user

def _save_m2m(self):
super()._save_m2m()
# Set experiments after the user and all related objects are saved
if hasattr(self, '_experiments_value'):
_set_user_experiments(self.instance, self._experiments_value)
delattr(self, '_experiments_value')


class UserAdminWithExperiments(UserAdmin):
form = UserExperimentsChangeForm
fieldsets = UserAdmin.fieldsets + (("Experiments", {"fields": ("experiments",)}),)


def register_user_admin():
try:
admin.site.unregister(User)
except admin.sites.NotRegistered:
pass
admin.site.register(User, UserAdminWithExperiments)


register_user_admin()
68 changes: 68 additions & 0 deletions reader/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2026-02-01 14:27
from __future__ import unicode_literals

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

def _ensure_user_pk(apps, schema_editor):
connection = schema_editor.connection
if connection.vendor != "postgresql":
return

try:
app_label, model_name = settings.AUTH_USER_MODEL.split(".")
except ValueError:
return

try:
user_model = apps.get_model(app_label, model_name)
except LookupError:
return

table = user_model._meta.db_table
pk_column = user_model._meta.pk.column

with connection.cursor() as cursor:
if table not in connection.introspection.table_names(cursor):
return

cursor.execute(
"SELECT 1 FROM pg_constraint WHERE contype = 'p' AND conrelid = %s::regclass",
[table],
)
if cursor.fetchone():
return

constraint_name = "%s_pkey" % table
qn = connection.ops.quote_name
cursor.execute(
"ALTER TABLE %s ADD CONSTRAINT %s PRIMARY KEY (%s)"
% (qn(table), qn(constraint_name), qn(pk_column))
)

operations = [
migrations.RunPython(_ensure_user_pk, reverse_code=migrations.RunPython.noop),
migrations.CreateModel(
name='UserExperimentSettings',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('experiments', models.BooleanField(default=True)),
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='experiment_settings', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'User experiment settings',
'verbose_name_plural': 'User experiment settings',
},
),
]
1 change: 1 addition & 0 deletions reader/migrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

37 changes: 36 additions & 1 deletion reader/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
from django.contrib.auth.models import User
from django.core.exceptions import ObjectDoesNotExist
from django.db import models

# Create your models here.

class UserExperimentSettings(models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE, related_name="experiment_settings")
experiments = models.BooleanField(default=True)

class Meta:
verbose_name = "User experiment settings"
verbose_name_plural = "User experiment settings"

def __str__(self):
return f"Experiments for user {self.user_id}"


def _get_user_experiments(user):
try:
return bool(user.experiment_settings.experiments)
except ObjectDoesNotExist:
return False


def _set_user_experiments(user, value):
settings, _ = UserExperimentSettings.objects.get_or_create(user=user)
settings.experiments = bool(value)
settings.save(update_fields=["experiments"])


if not hasattr(User, "experiments"):
User.add_to_class("experiments", property(_get_user_experiments, _set_user_experiments))


def user_has_experiments(user):
if not user or not getattr(user, "is_authenticated", False):
return False
return UserExperimentSettings.objects.filter(user=user, experiments=True).exists()
5 changes: 5 additions & 0 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from sefaria.system.decorators import catch_error_as_json, sanitize_get_params, json_response_decorator
from sefaria.system.exceptions import InputError, PartialRefInputError, BookNameError, NoVersionFoundError, DictionaryEntryNotFoundError
from sefaria.system.cache import django_cache
from reader.models import user_has_experiments
from sefaria.system.database import db
from sefaria.helper.search import get_query_obj
from sefaria.helper.crm.crm_mediator import CrmMediator
Expand Down Expand Up @@ -3834,6 +3835,8 @@ def profile_api(request, slug=None):
if not profileJSON:
return jsonResponse({"error": "No post JSON."})
profileUpdate = json.loads(profileJSON)
if "experiments" in profileUpdate and not user_has_experiments(request.user):
profileUpdate.pop("experiments", None)

profile = UserProfile(id=request.user.id)
profile.update(profileUpdate)
Expand Down Expand Up @@ -4168,9 +4171,11 @@ def account_settings(request):
Page for managing a user's account settings.
"""
profile = UserProfile(id=request.user.id)
experiments_available = user_has_experiments(request.user)
return render_template(request,'account_settings.html', {"headerMode": True}, {
'user': request.user,
'profile': profile,
'experiments_available': experiments_available,
'lang_names_and_codes': zip([Locale(lang).languages[lang].capitalize() for lang in SITE_SETTINGS['SUPPORTED_TRANSLATION_LANGUAGES']], SITE_SETTINGS['SUPPORTED_TRANSLATION_LANGUAGES']),
'translation_language_preference': (profile is not None and profile.settings.get("translation_language_preference", None)) or request.COOKIES.get("translation_language_preference", None),
"renderStatic": True
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Appium-Python-Client==1.2.0
Cerberus
cryptography==42.0.7
PyJWT==1.7.1 # pinned b/c current version 2.0.0 breaks simplejwt. waiting for 2.0.1
babel
django-admin-sortable==2.1.13
Expand Down Expand Up @@ -45,7 +46,8 @@ google-auth==1.24.0
google-cloud-logging==1.15.1
google-cloud-storage==1.32.0
google-re2
gunicorn==20.0.4
gunicorn==23.0.0
setuptools==69.5.1
html5lib==0.9999999
httplib2==0.18.1
ipython==7.34.*
Expand Down
5 changes: 5 additions & 0 deletions sefaria/local_settings_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
MANAGERS = ADMINS

SECRET_KEY = 'insert your long random secret key here !'
CHATBOT_USER_ID_SECRET = 'insert your chatbot user id secret here'


EMAIL_HOST = 'localhost'
Expand Down Expand Up @@ -352,3 +353,7 @@
CSRF_COOKIE_SECURE = True # Set to True if using HTTPS
CSRF_COOKIE_HTTPONLY = False # Must be False for CSRF tokens to work with JavaScript
CSRF_COOKIE_SAMESITE = 'Lax' # Modern browsers require this

CHATBOT_API_BASE_URL = os.getenv("CHATBOT_API_BASE_URL", "https://chat-dev.sefaria.org/api")
# Use the local Vite dev server script instead of the hosted UMD bundle.
CHATBOT_USE_LOCAL_SCRIPT = True
3 changes: 3 additions & 0 deletions sefaria/model/user_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ def __init__(self, user_obj=None, id=None, slug=None, email=None, user_registrat

# Fundraising
self.is_sustainer = False
self.experiments = False

# Update with saved profile doc in MongoDB
profile = db.profiles.find_one({"id": id})
Expand Down Expand Up @@ -665,6 +666,7 @@ def to_mongo_dict(self):
"version_preferences_by_corpus": self.version_preferences_by_corpus,
"attr_time_stamps": self.attr_time_stamps,
"is_sustainer": self.is_sustainer,
"experiments": self.experiments,
"tag_order": getattr(self, "tag_order", None),
"last_sync_web": self.last_sync_web,
"profile_pic_url": self.profile_pic_url,
Expand Down Expand Up @@ -705,6 +707,7 @@ def to_api_dict(self, basic=False):
other_info = {
"pinned_sheets": self.pinned_sheets,
"is_sustainer": self.is_sustainer,
"experiments": self.experiments,
}
dictionary.update(other_info)
return dictionary
Expand Down
3 changes: 3 additions & 0 deletions sefaria/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def get_static_url():

# Make this unique, and don't share it with anybody.
SECRET_KEY = ''
CHATBOT_USER_ID_SECRET = 'secret'
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing CHATBOT_API_BASE_URL default causes AttributeError crash

High Severity

The context processor accesses settings.CHATBOT_API_BASE_URL at line 132, but this setting is only added to local_settings_example.py, not to the base sefaria/settings.py. While CHATBOT_USER_ID_SECRET is correctly added to the base settings file, CHATBOT_API_BASE_URL is not. If a deployment has a local_settings.py that doesn't define this setting, the context processor will crash with AttributeError for users with experiments enabled.

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Weak default secret passes the emptiness guard

Medium Severity

CHATBOT_USER_ID_SECRET defaults to the string 'secret' in base settings and the helm chart env var fallback. The guard if not CHATBOT_USER_ID_SECRET in the context processor won't catch this because 'secret' is truthy, so tokens would silently be generated with a well-known key if the environment variable is not configured. Compare with SECRET_KEY which defaults to '' (falsy), causing an obvious failure.

Additional Locations (2)

Fix in Cursor Fix in Web

CHATBOT_USE_LOCAL_SCRIPT = False

TEMPLATES = [
{
Expand All @@ -99,6 +101,7 @@ def get_static_url():
"sefaria.system.context_processors.large_data",
"sefaria.system.context_processors.body_flags",
"sefaria.system.context_processors.base_props",
"sefaria.system.context_processors.chatbot_user_token",
"sefaria.system.context_processors.module_context",
],
'loaders': [
Expand Down
25 changes: 25 additions & 0 deletions sefaria/system/context_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@
from functools import wraps

from sefaria.settings import *
from django.conf import settings
from sefaria.site.site_settings import SITE_SETTINGS
from sefaria.model import library
from sefaria.model.user_profile import UserProfile, UserHistorySet, UserWrapper
from sefaria.utils import calendars
from sefaria.utils.util import short_to_long_lang_code
from sefaria.utils.chatbot import build_chatbot_user_token
from sefaria.utils.hebrew import hebrew_parasha_name
from reader.views import render_react_component, _get_user_calendar_params
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused imports added to context_processors module

Low Severity

Several newly added top-level imports are unused: UserHistorySet, UserWrapper, calendars, short_to_long_lang_code, hebrew_parasha_name, render_react_component, and _get_user_calendar_params. Only UserProfile and build_chatbot_user_token are actually used by the new chatbot_user_token function. Notably, the existing code pattern in this file does a local import from reader.views (line 84) to avoid heavy top-level dependencies; the new top-level from reader.views import ... breaks that convention.

Fix in Cursor Fix in Web


import structlog
logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -68,6 +75,7 @@ def global_settings(request):
"OFFLINE": OFFLINE,
"SITE_SETTINGS": SITE_SETTINGS,
"CLIENT_SENTRY_DSN": CLIENT_SENTRY_DSN,
"CHATBOT_USE_LOCAL_SCRIPT": CHATBOT_USE_LOCAL_SCRIPT,
}


Expand Down Expand Up @@ -107,3 +115,20 @@ def large_data(request):
@user_only
def body_flags(request):
return {"EMBED": "embed" in request.GET}


@user_only
def chatbot_user_token(request):
if not request.user.is_authenticated:
return {"chatbot_user_token": None, "chatbot_enabled": False}
if not CHATBOT_USER_ID_SECRET:
return {"chatbot_user_token": None, "chatbot_enabled": False}
profile = UserProfile(user_obj=request.user)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expensive UserProfile created on every authenticated page load

Medium Severity

The chatbot_user_token context processor constructs a UserProfile(user_obj=request.user) for every authenticated user on every user-visible page. UserProfile.__init__ executes five MongoDB queries (profile document, followers, followees, blockers, blockees). The vast majority of users won't have experiments enabled, making these queries pure overhead. A lightweight check like user_has_experiments(request.user) (a single PostgreSQL EXISTS query) before creating the UserProfile would avoid this cost for nearly all requests.

Fix in Cursor Fix in Web

if not getattr(profile, "experiments", False):
return {"chatbot_user_token": None, "chatbot_enabled": False}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chatbot authorization bypass after permission revocation

Medium Severity

The chatbot_user_token context processor checks profile.experiments (MongoDB user preference) but not user_has_experiments() (PostgreSQL admin-controlled permission). Other parts of the codebase correctly use user_has_experiments() to verify access. This means if an admin revokes a user's experiment access via UserExperimentSettings, the chatbot will still be shown because profile.experiments remains True in MongoDB. The context processor needs to verify both the admin permission and user preference.

Fix in Cursor Fix in Web

token = build_chatbot_user_token(request.user.id, CHATBOT_USER_ID_SECRET)
return {
"chatbot_user_token": token,
"chatbot_enabled": True,
"chatbot_api_base_url": settings.CHATBOT_API_BASE_URL,
}
37 changes: 37 additions & 0 deletions sefaria/utils/chatbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import base64
import hashlib
import json
import os
from datetime import timedelta

from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from django.utils import timezone

DEFAULT_TTL_HOURS = 72
NONCE_SIZE_BYTES = 12


def _hash_user_id(user_id):
return hashlib.sha256(str(user_id).encode("utf-8")).hexdigest()


def _derive_key(secret):
return hashlib.sha256(secret.encode("utf-8")).digest()


def build_chatbot_user_token(user_id, secret, now=None, ttl_hours=DEFAULT_TTL_HOURS):
if not user_id or not secret:
return None

expires_at = (now or timezone.now()) + timedelta(hours=ttl_hours)
payload = {
"id": _hash_user_id(user_id),
"expiration": expires_at.replace(microsecond=0).isoformat(),
}
payload_bytes = json.dumps(payload, separators=(",", ":"), sort_keys=True).encode("utf-8")
key = _derive_key(secret)
aesgcm = AESGCM(key)
nonce = os.urandom(NONCE_SIZE_BYTES)
encrypted = aesgcm.encrypt(nonce, payload_bytes, None)
token_bytes = nonce + encrypted
return base64.urlsafe_b64encode(token_bytes).decode("ascii")
Loading
Loading