From e17dce597fa619c98f5ca7b16b53f9fea7c6b16c Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Thu, 31 Jul 2025 14:25:50 +0300
Subject: [PATCH 01/36] feat: Add SheetScorer tool for analyzing Jewish study
 sheets with LLM-based scoring

- Add SheetScorer class for scoring biblical reference discussion levels (0-4 scale), title interest, and creativity assessment with multi-language Hebrew/English support. Add resilient LLM processing with OpenAI function calling, recursive fallback handling, intelligent content chunking, and token management with smart truncation and LLM-based summarization for large sheets
- Add Celery integration with SheetScoringInput/SheetScoringOutput dataclasses in scoring_io module and score_sheet_task
- Register sheet_scoring package in Celery autodiscovery and add test script for validation

This enables automated analysis of Jewish study sheets to score reference discussion
quality and title engagement, supporting content curation workflows.
---
 app/celery_setup/app.py                       |   2 +-
 .../scoring_io/__init__.py                    |   4 +
 .../scoring_io/scoring_io_input.py            |   6 +
 .../scoring_io/scoring_io_output.py           |  36 +
 app/sheet_scoring/README.md                   | 227 ++++++
 app/sheet_scoring/openai_sheets_scorer.py     | 722 ++++++++++++++++++
 app/sheet_scoring/sheet_scoring.py            |  40 +
 app/sheet_scoring/tasks.py                    |  13 +
 app/sheet_scoring/text_utils.py               | 116 +++
 9 files changed, 1165 insertions(+), 1 deletion(-)
 create mode 100644 app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
 create mode 100644 app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
 create mode 100644 app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
 create mode 100644 app/sheet_scoring/README.md
 create mode 100644 app/sheet_scoring/openai_sheets_scorer.py
 create mode 100644 app/sheet_scoring/sheet_scoring.py
 create mode 100644 app/sheet_scoring/tasks.py
 create mode 100644 app/sheet_scoring/text_utils.py

diff --git a/app/celery_setup/app.py b/app/celery_setup/app.py
index 0b50834..d123eeb 100644
--- a/app/celery_setup/app.py
+++ b/app/celery_setup/app.py
@@ -3,4 +3,4 @@
 
 app = Celery('llm')
 app.conf.update(**generate_config_from_env())
-app.autodiscover_tasks(packages=['topic_prompt'])
+app.autodiscover_tasks(packages=['topic_prompt', 'sheet_scoring'])
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py b/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
new file mode 100644
index 0000000..2c39c73
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
@@ -0,0 +1,4 @@
+from .scoring_io_input import SheetScoringInput
+from .scoring_io_output import SheetScoringOutput
+
+__all__ = ["SheetScoringInput", "SheetScoringOutput"]
\ No newline at end of file
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
new file mode 100644
index 0000000..55406fe
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
@@ -0,0 +1,6 @@
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class SheetScoringInput:
+    sheet_content: dict[str, Any]
+
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
new file mode 100644
index 0000000..3f3f9ab
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
@@ -0,0 +1,36 @@
+from dataclasses import dataclass
+from typing import Dict, Union, List
+from datetime import datetime
+
+
+@dataclass
+class SheetScoringOutput:
+    sheet_id:str
+    processed_at: str
+    language: str
+    title_interest_level: int
+    title_interest_reason: str
+    creativity_score:float
+    ref_levels: Dict[str, int]
+    ref_scores: Dict[str, float]
+
+
+    def __init__(self,
+                 sheet_id: str,
+                 ref_scores: Dict[str, float],
+                 ref_levels:Dict[str, int],
+                 processed_at: Union[str, datetime],
+                 language: str,
+                 creativity_score: float,
+                 title_interest_level: int,
+                 title_interest_reason: str):
+        self.ref_scores = ref_scores
+        self.sheet_id = sheet_id
+        self.processed_at = processed_at.isoformat() if isinstance(
+            processed_at, datetime
+        ) else processed_at
+        self.ref_levels = ref_levels
+        self.creativity_score = creativity_score
+        self.language = language
+        self.title_interest_level = title_interest_level
+        self.title_interest_reason = title_interest_reason
\ No newline at end of file
diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
new file mode 100644
index 0000000..40f8c0d
--- /dev/null
+++ b/app/sheet_scoring/README.md
@@ -0,0 +1,227 @@
+# SheetScorer - Jewish Study Sheet Analysis Tool
+
+**SheetScorer** is a Python tool that uses **LLMs** to automatically analyze and score Jewish study sheets for reference relevance and title interest. It processes sheets from **MongoDB**, evaluates how well biblical references are discussed, and assigns engagement scores to sheet titles.
+
+## Scores Extracted
+
+- **Reference Discussion Scoring**: Analyzes how thoroughly each biblical reference is discussed (**0-4 scale**)
+- **Title Interest Scoring**: Evaluates how engaging sheet titles are to potential readers (**0-4 scale**)  
+- **Creativity Assessment**: Computes creativity scores based on percentage of **user-generated content**
+- **Title Interest Reason**: Explanation of title scoring. 
+
+## Quick Start
+
+```python
+from sheet_scorer import SheetScorer
+
+# Initialize scorer
+scorer = SheetScorer(
+    api_key="your-openai-api-key",
+    model="gpt-4o-mini"
+)
+
+# Process a sheet
+sheet_data = {
+    "_id": "sheet123",
+    "title": "Understanding Genesis Creation",
+    "expandedRefs": ["Genesis 1:1", "Genesis 1:2", "Genesis 1:3"],
+    # ... other sheet content
+}
+
+result = scorer.process_sheet_by_content(sheet_data)
+print(result)
+```
+
+## Scoring System
+
+### Reference Discussion Levels
+
+The tool evaluates how well each biblical reference is discussed using a **0-4 scale**:
+
+| Level | Description |
+|-------|-------------|
+| **0 - Not Discussed** | Reference is **quoted only**, no discussion or commentary |
+| **1 - Minimal** | Mentioned only through **neighboring verses**, minimal engagement |
+| **2 - Moderate** | Some discussion present with **basic commentary** |
+| **3 - Significant** | **Substantial discussion** with detailed commentary |
+| **4 - Central** | Reference is a **central focus** of the entire sheet |
+
+### Title Interest Levels
+
+Sheet titles are scored for **user engagement** on a **0-4 scale**:
+
+| Level | Description |
+|-------|-------------|
+| **0 - Not Interesting** | **Off-topic** or unengaging for target users |
+| **1 - Slight Relevance** | **Low appeal**, users unlikely to engage |
+| **2 - Somewhat Interesting** | Users might **skim**, moderate appeal |
+| **3 - Interesting** | Users **likely to open** and read |
+| **4 - Very Compelling** | **Must-read content**, high engagement expected |
+
+### Creativity Score
+
+Calculated as the **percentage of user-generated content** versus all text (including quoted canonical text). Higher scores indicate more **original commentary** and analysis.
+
+## Configuration Options
+
+### Initialization Parameters
+
+```python
+scorer = SheetScorer(
+    api_key="your-api-key",           # OpenAI API key
+    model="gpt-4o-mini",              # Model to use
+    max_prompt_tokens=128000,         # Maximum input tokens
+    token_margin=16384,               # Reserved tokens for output
+    max_ref_to_process=800,           # Maximum references to process
+    chunk_size=80                     # References per chunk
+)
+```
+
+### Key Constants
+
+- **DEFAULT_MAX_OUTPUT_TOKENS**: **16384**
+- **DEFAULT_CHUNK_SIZE**: **80** references per processing chunk
+- **DEFAULT_MAX_INPUT_OUTPUT_TOKENS**: **128000** total token limit
+- **MAX_CHUNK_OVERLAP**: **10** references overlap between chunks
+
+## Core Methods
+
+### **process_sheet_by_content(sheet, add_full_comment)**
+
+**Main method** to process a complete sheet and return scores.
+
+**Parameters:**
+- `sheet` (**Dict**): **MongoDB** sheet document containing title, references, and content
+- `add_full_comment` (**bool**): parameter that allows to add quotations text to input that LLM receives
+
+**Returns:**
+- **Dictionary** with scoring results or **None** if processing fails
+
+**Example Output:**
+```python
+{
+    "_id": "sheet123",
+    "ref_levels": {"Genesis 1:1": 3, "Genesis 1:2": 2},
+    "ref_scores": {"Genesis 1:1": 60.0, "Genesis 1:2": 40.0},
+    "title_interest_level": 3,
+    "title_interest_reason": "Compelling theological question",
+    "language": "en",
+    "creativity_score": 0.75,
+    "processed_at": "2025-01-31T10:30:00Z"
+}
+```
+! ref_scores is normalized version of ref_levels
+
+### **get_gpt_scores(content, ref_names, title)**
+
+**Core scoring method** that processes content and returns analysis.
+
+**Parameters:**
+- `content` (**str**): Sheet text content to analyze
+- `ref_names` (**List[str]**): List of biblical references to score
+- `title` (**str**): Sheet title to evaluate
+
+## Content Processing Strategy
+
+The tool uses an **adjustable approach** for canonical quotations:
+
+1. **Always includes** all user commentary and **original content**
+2. **Conditionally includes** canonical quotes only if the **entire bundle** fits within token limits
+and **add_full_comment is set to True** 
+3. **Truncates intelligently** using **LLM summarization** when content exceeds limits 
+   4. ***LLM Summarization***: Uses secondary LLM to compress content while preserving key information 
+   5. ***Reference Preservation***: Maintains all biblical reference tags during compression 
+   6. ***Character Fallback***: Falls back to character-based truncation if summarization fails
+
+## Grading Strategy 
+Processed content is sent to LLM, together with references for grading: 
+
+### Resilient Grading List Processing
+
+- **Chunking**: Large reference lists are processed in **chunks** to stay within model limits
+- **Overlap Handling**: Smart overlap between chunks prevents **reference boundary issues**
+
+### Resilient Reference Grading
+
+- **Primary attempt**: Process **all references together**
+- **Fallback**: Split reference list in **half** and process **recursively**
+- **Final fallback**: Assign **default score of 0** to problematic references
+
+
+### Resilient score extraction 
+
+Uses **OpenAI's function calling** feature with **strict schemas**:
+
+#### Middle Chunk Scoring Schema 
+```python
+{
+    "name": "score_references",
+    "parameters": {
+        "ref_levels": {
+            "Genesis 1:1": {"type": "integer", "minimum": 0, "maximum": 4},
+            # ... for each reference
+        }
+    }
+}
+```
+
+#### Title Scoring Schema
+```python
+{
+    "name": "score_title", 
+    "parameters": {
+        "language": {"type": "string"},
+        "title_interest_level": {"type": "integer", "minimum": 0, "maximum": 4},
+        "title_interest_reason": {"type": "string", "maxLength": 100}
+    }
+}
+```
+
+
+## Database Integration
+
+Designed for **MongoDB integration** with expected document structure:
+
+```python
+{
+    "_id": "unique_sheet_id",
+    "title": "Sheet Title",
+    "expandedRefs": ["Genesis 1:1", "Exodus 2:3"],
+    # Additional sheet content fields...
+}
+```
+
+## Output Fields
+
+| Field | Description |
+|-------|-------------|
+| **`ref_levels`** | Raw **0-4 scores** for each reference |
+| **`ref_scores`** | **Normalized percentage scores** (sum to 100%) |
+| **`title_interest_level`** | Title **engagement score** (0-4) |
+| **`title_interest_reason`** | **Brief explanation** of title score |
+| **`language`** | **Detected language code** |
+| **`creativity_score`** | **Percentage** of user-generated content |
+| **`processed_at`** | **Processing timestamp** |
+
+## Logging
+
+**Comprehensive logging** for monitoring and debugging:
+
+- **Info**: Processing decisions and **content statistics**
+- **Warning**: **Score validation** and fallback usage  
+- **Error**: **LLM failures** and processing errors
+
+Configure logging level as needed:
+```python
+import logging
+logging.getLogger('sheet_scorer').setLevel(logging.INFO)
+```
+
+
+## Language Support
+
+Supports **automatic detection** and processing of:
+
+- **English** (`en`) - **Default language**
+- **Hebrew** (`he`) - Full **RTL support**
+- Language detection based on **original user-written content**
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
new file mode 100644
index 0000000..ab7d35e
--- /dev/null
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -0,0 +1,722 @@
+import json
+import logging
+from datetime import datetime
+from enum import IntEnum
+from typing import Any,Dict,Iterator,List,Optional,Set,Tuple
+
+import tiktoken
+from langchain.schema import HumanMessage
+from langchain_openai import ChatOpenAI
+
+from app.sheet_scoring.text_utils import sheet_to_text_views
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+
+class IncompleteScoreError(Exception):
+    """Raised when LLM JSON is valid but doesn’t cover every reference."""
+    pass
+
+
+class ScoreLevel(IntEnum):
+    """Reference discussion and title interest levels."""
+    NOT_DISCUSSED = 0
+    MINIMAL = 1
+    MODERATE = 2
+    SIGNIFICANT = 3
+    CENTRAL = 4
+
+
+class LanguageCode:
+    """Supported language codes."""
+    ENGLISH = 'en'
+    HEBREW = 'he'
+    DEFAULT = ENGLISH
+
+
+class SheetScorer:
+    """
+    Scores Jewish study sheets for reference relevance and title interest using LLMs,
+    computes creativity score based on percentage of user generated content.
+
+    This class processes sheets from MongoDB, analyzes their content using OpenAI's GPT models,
+    and assigns scores for how well each biblical reference is discussed and how interesting
+    the sheet title is to users.
+    """
+
+    # Configuration constants
+    DEFAULT_MAX_OUTPUT_TOKENS = 16384
+    DEFAULT_CHUNK_SIZE = 80
+    DEFAULT_MAX_INPUT_OUTPUT_TOKENS = 128000
+    DEFAULT_MAX_REFS_TO_PROCESS = 800
+    DEFAULT_TOKEN_CHAR_RATIO = 3
+    MAX_CHUNK_OVERLAP = 10
+    # Database field names
+    REF_SCORES_FIELD = "ref_scores"
+    REF_LEVELS_FIELD = "ref_levels"
+    TITLE_INTEREST_FIELD = "title_interest_level"
+    LANGUAGE_FIELD = "language"
+    TITLE_INTEREST_REASON_FIELD = 'title_interest_reason'
+    SHEET_ID_FIELD = "_id"
+    PROCESSED_AT_FIELD = "processed_at"
+    CREATIVITY_SCORE_FIELD = 'creativity_score'
+
+    # Valid score levels
+    VALID_LEVELS: Set[int] = {level.value for level in ScoreLevel}
+
+    def __init__(
+            self,
+            api_key: Optional[str],
+            model: str = "gpt-4o-mini",
+            max_prompt_tokens: int = DEFAULT_MAX_INPUT_OUTPUT_TOKENS,
+            token_margin: int = DEFAULT_MAX_OUTPUT_TOKENS,
+            max_ref_to_process: int = DEFAULT_MAX_REFS_TO_PROCESS,
+            chunk_size: int = DEFAULT_CHUNK_SIZE,
+
+    ):
+        self.max_prompt_tokens = max_prompt_tokens
+        self.token_margin = token_margin
+        self.model = model
+        self.chunk_size = chunk_size
+        self.max_ref_to_process = max_ref_to_process
+        self.llm = self._create_json_llm(api_key,model)
+        self.summarizer = self._create_text_llm(api_key,model)
+
+    def _create_json_llm(self,api_key: str,model: str) -> ChatOpenAI:
+        """Create LLM client for JSON responses."""
+        return ChatOpenAI(
+            model=model,
+            temperature=0,
+            top_p=0,
+            frequency_penalty=0,
+            presence_penalty=0,
+            seed=42,
+            api_key=api_key,
+        )
+
+    def _create_text_llm(self,api_key: str,model: str) -> ChatOpenAI:
+        """Create LLM client for text responses."""
+        return ChatOpenAI(
+            model=model,
+            temperature=0,
+            model_kwargs={"response_format": {"type": "text"}},
+            api_key=api_key,
+        )
+
+    def _invoke_llm_with_function(self,prompt: str,
+                                  function_schema: Dict[str,Any]) -> Dict[
+        str,Any]:
+        """Invoke LLM using function calling instead of JSON mode."""
+        response = self.llm.invoke(
+            [HumanMessage(content=prompt)],
+            functions=[function_schema],
+            function_call={"name": function_schema["name"]}
+        )
+
+        # Extract function call arguments
+        if hasattr(
+                response,'additional_kwargs'
+        ) and 'function_call' in response.additional_kwargs:
+            function_call = response.additional_kwargs['function_call']
+            return json.loads(function_call['arguments'])
+        else:
+            raise ValueError("No function call in response")
+
+    def _get_reference_scoring_function_schema(self,ref_names: List[str]) -> \
+            Dict[str,Any]:
+        """Create function schema for reference scoring with exact reference names."""
+        return {
+            "name": "score_references",
+            "description": "Score how well each biblical reference is discussed in the sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.REF_LEVELS_FIELD: {
+                        "type": "object",
+                        "description": "Scores for each reference (0-4 scale)",
+                        "properties": {
+                            ref_name: {
+                                "type": "integer",
+                                "description": f"Discussion level for {ref_name}",
+                                "minimum": 0,
+                                "maximum": 4
+                            }
+                            for ref_name in ref_names
+                        },
+                        "required": ref_names,
+                        "additionalProperties": False
+                    }
+                },
+                "required": [self.REF_LEVELS_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    def _get_title_scoring_schema(self) -> Dict[
+        str,Any]:
+        """Create function schema for both reference and title scoring."""
+        return {
+            "name": "score_title",
+            "description": "Score title interest for a Jewish study sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.LANGUAGE_FIELD: {
+                        "type": "string",
+                        "description": "ISO-639-1 title language code",
+                    },
+                    self.TITLE_INTEREST_FIELD: {
+                        "type": "integer",
+                        "description": "How interesting the title is to users (0-4 scale)",
+                        "minimum": 0,
+                        "maximum": 4
+                    },
+                    self.TITLE_INTEREST_REASON_FIELD: {
+                        "type": "string",
+                        "description": "Brief explanation of title interest score (max 20 words)",
+                        "maxLength": 100
+                    }
+                },
+                "required": [self.LANGUAGE_FIELD,self.TITLE_INTEREST_FIELD,
+                             self.TITLE_INTEREST_REASON_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    def _get_full_scoring_function_schema(self,ref_names: List[str]) -> Dict[
+        str,Any]:
+        """Create function schema for both reference and title scoring."""
+        return {
+            "name": "score_sheet",
+            "description": "Score references and title interest for a Jewish study sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.LANGUAGE_FIELD: {
+                        "type": "string",
+                        "description": "# ISO‑639‑1 code inferred from *original user‑written* content",
+                    },
+                    self.REF_LEVELS_FIELD: {
+                        "type": "object",
+                        "description": "Scores for each reference (0-4 scale)",
+                        "properties": {
+                            ref_name: {
+                                "type": "integer",
+                                "description": f"Discussion level for {ref_name}",
+                                "minimum": 0,
+                                "maximum": 4
+                            }
+                            for ref_name in ref_names
+                        },
+                        "required": ref_names,
+                        "additionalProperties": False
+                    },
+                    self.TITLE_INTEREST_FIELD: {
+                        "type": "integer",
+                        "description": "How interesting the title is to users (0-4 scale)",
+                        "minimum": 0,
+                        "maximum": 4
+                    },
+                    self.TITLE_INTEREST_REASON_FIELD: {
+                        "type": "string",
+                        "description": "Brief explanation of title interest score (max 20 words)",
+                        "maxLength": 100
+                    }
+                },
+                "required": [self.LANGUAGE_FIELD,self.REF_LEVELS_FIELD,
+                             self.TITLE_INTEREST_FIELD,
+                             self.TITLE_INTEREST_REASON_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    @staticmethod
+    def chunk_list(lst: List[Any],n: int) -> Iterator[List[Any]]:
+        """Yield successive n‑sized chunks from lst."""
+        for i in range(0,len(lst),n):
+            yield lst[i: i + n]
+
+    def _count_tokens(self,text: str) -> int:
+        """Rough token count; if no encoder, fall back to char heuristic."""
+        try:
+            encoding = tiktoken.encoding_for_model(self.model)
+            return len(encoding.encode(text))
+        except (KeyError,ValueError) as e:
+            logger.warning(
+                f"Could not get encoding for model {self.model}: {e}"
+            )
+            return len(text) // self.DEFAULT_TOKEN_CHAR_RATIO
+
+    def _invoke_llm(self,prompt: str) -> Dict[str,Any]:
+        """Invoke LLM with prompt and parse JSON response."""
+        response = self.llm([HumanMessage(content=prompt)])
+        return json.loads(response.content)
+
+    def _create_title_only_prompt_function(self,sheet_title: str) -> str:
+        return f"""
+    You are scoring THE TITLE of a Jewish study sheet for how interesting it would be to users.
+
+    SHEET TITLE:
+    {sheet_title}
+
+    TASK: Return JSON with keys `title_interest_level` (0-4) and `title_interest_reason` ( < 20 words). 
+    Choose a higher score when the title:
+
+    Title interest level (int 0–4):
+      0: Not interesting / off‑topic for users
+      1: Slight relevance, low pull
+      2: Somewhat interesting; user might skim
+      3: Interesting; user likely to open
+      4: Very compelling / must‑open
+    """
+
+    def _create_chunk_prompt_for_function(self,sheet_content: str,
+                                          ref_names: List[str]) -> str:
+        """Create prompt for function calling (no JSON format instructions needed)."""
+        refs_md = "\n".join(f"- {r}" for r in ref_names)
+        return f"""
+You are analyzing a Jewish study sheet. Rate how much each listed reference 
+is discussed or central in the sheet.
+
+SHEET CONTENT:
+{sheet_content}
+
+REFERENCES TO EVALUATE:
+{refs_md}
+
+Scoring Scale (0-4):
+  0: Quoted only, no discussion
+  1: Mentioned only through neighboring verses
+  2: Moderate discussion (some commentary)
+  3: Significant discussion (substantial commentary)  
+  4: Central focus of sheet
+
+Score each reference based on how thoroughly it's discussed in the content.
+"""
+
+    def _create_final_chunk_prompt_for_function(self,sheet_content: str,
+                                                ref_names: List[str],
+                                                sheet_title: str) -> str:
+        """Create prompt for final chunk with title scoring using function
+        calling."""
+        sheet_title_clean = sheet_title.strip() or "(untitled)"
+        refs_md = "\n".join(f"- {r}" for r in ref_names)
+
+        return f"""
+Analyze this Jewish study sheet and provide two types of scores:
+
+SHEET TITLE: {sheet_title_clean}
+
+SHEET CONTENT:
+{sheet_content}
+
+REFERENCES TO EVALUATE:
+{refs_md}
+
+TASKS:
+1. Reference Discussion Scoring (0-4):
+   0: Quoted only, no discussion
+   1: Mentioned only through neighboring verses  
+   2: Moderate discussion (some commentary)
+   3: Significant discussion (substantial commentary)
+   4: Central focus of sheet
+
+2. Title Interest Scoring (0-4):
+   0: Not interesting/off-topic
+   1: Slight relevance, low appeal
+   2: Somewhat interesting; user might skim
+   3: Interesting; user likely to open
+   4: Very compelling/must-open
+
+Infer the language from the original user-written content.
+"""
+
+    def _validate_score_level(self,score: Any,
+                              field_name: str = "score") -> int:
+        """Validate and normalize score to valid range."""
+        if score not in self.VALID_LEVELS:
+            try:
+                score = int(score)
+            except (ValueError,TypeError):
+                logger.warning(
+                    f"Invalid {field_name}: {score}, defaulting to 0"
+                )
+                return ScoreLevel.NOT_DISCUSSED
+
+            if score not in self.VALID_LEVELS:
+                clamped = max(
+                    ScoreLevel.NOT_DISCUSSED,
+                    min(ScoreLevel.CENTRAL,score)
+                )
+                logger.warning(
+                    f"{field_name} {score} out of range, clamping to {clamped}"
+                )
+                return clamped
+
+        return score
+
+    def _sheet_to_text(
+        self,
+        no_quotes_content: str,
+        full_content: str,
+        max_tokens: int,
+        add_full_commentary: bool
+    ) -> str:
+        """
+        Build a text snapshot of the sheet with an *all‑or‑nothing* rule:
+        • Always include every bit of author commentary.
+        • Append *all* canonical quotations only if the whole bundle still
+          fits into `max_tokens`.
+        """
+        comm_tokens = self._count_tokens(no_quotes_content)
+        # Commentary alone is already bigger than the budget → truncate & quit
+        full_tokens = self._count_tokens(full_content)
+        if add_full_commentary:
+            if full_tokens <= max_tokens:
+                logger.info("Sending to LLM sheet with quotations")
+                return full_content
+
+        if comm_tokens >= max_tokens:
+            logger.info("Truncating user commentaries")
+            return self._truncate_to_token_budget(no_quotes_content, max_tokens)
+        logger.info("Sending to LLM sheet without quotations text")
+        return no_quotes_content
+
+
+    def _get_title_info(self,sheet_title: str) -> Dict[str,Any]:
+        """Obtain title-interest score ONLY (used when no content)."""
+        prompt = self._create_title_only_prompt_function(sheet_title)
+        try:
+            function_schema = self._get_title_scoring_schema()
+            data = self._invoke_llm_with_function(prompt,function_schema)
+            title_level = self._validate_score_level(
+                data.get(self.TITLE_INTEREST_FIELD),
+                self.TITLE_INTEREST_FIELD
+            )
+
+            return {
+                self.TITLE_INTEREST_FIELD:
+                    title_level,
+                self.TITLE_INTEREST_REASON_FIELD:
+                    data.get(self.TITLE_INTEREST_REASON_FIELD,""),
+                self.LANGUAGE_FIELD: data.get(
+                    self.LANGUAGE_FIELD,LanguageCode.DEFAULT
+                ),
+            }
+        except Exception as e:
+            logger.error(f"Title-only GPT attempt failed: {e}")
+            return {
+                self.TITLE_INTEREST_FIELD: ScoreLevel.NOT_DISCUSSED,
+                self.TITLE_INTEREST_REASON_FIELD: "LLM error",
+                self.LANGUAGE_FIELD: LanguageCode.DEFAULT
+            }
+
+    def _normalize_scores_to_percentages(
+            self,
+            sheet_tokens: int,
+            score_levels: Dict[str,int],
+            beta: float = 1500  # token mass where no penalty
+    ) -> Dict[str,float]:
+
+        total_level = sum(score_levels.values()) or 1
+        size_factor = min(1.0,sheet_tokens / beta)  # clamp to 1
+
+        # small sheets (few tokens) → size_factor < 1 → percentages shrink
+        percentages = {
+            ref: round(level * 100 / total_level * size_factor,2)
+            for ref,level in score_levels.items()
+        }
+
+        norm = sum(percentages.values()) or 1
+        percentages = {r: round(v * 100 / norm,2) for r,v in
+                       percentages.items()}
+        return percentages
+
+    def _grade_refs_resilient(
+            self,
+            content: str,
+            refs: List[str],
+            *,
+            with_title: bool = False,
+            sheet_title: str = ""
+    ) -> Tuple[Optional[Dict[str,Any]],Dict[str,int]]:
+        """
+        Robustly grade a list of refs.
+        • First try the whole list.
+        • If the model returns < len(refs) scores (or JSON error),
+          split the list in two and grade each half recursively.
+        """
+        if not refs:
+            return {},{}
+
+        try:
+            if with_title:
+                prompt = self._create_final_chunk_prompt_for_function(
+                    content,refs,sheet_title
+                )
+                function_schema = self._get_full_scoring_function_schema(refs)
+            else:
+                prompt = self._create_chunk_prompt_for_function(content,refs)
+                function_schema = self._get_reference_scoring_function_schema(
+                    refs
+                )
+            data,scores = self._get_gpt_ref_scores_function(
+                prompt,function_schema,refs
+                )
+            return data,scores
+        except Exception:
+            pass
+
+        # fallback branch
+        if len(refs) == 1:  # nothing left to split
+            return {},{refs[0]: ScoreLevel.NOT_DISCUSSED}
+
+        mid = len(refs) // 2
+        ld,ls = self._grade_refs_resilient(
+            content,refs[:mid],
+            with_title=with_title,
+            sheet_title=sheet_title
+        )
+        rd,rs = self._grade_refs_resilient(
+            content,refs[mid:],
+            with_title=with_title,
+            sheet_title=sheet_title
+        )
+        merged_scores = {**ls,**rs}
+        merged_data = ld or rd
+        return merged_data,merged_scores
+
+    def _get_gpt_ref_scores_function(self,prompt: str,function_schema,
+                                     expected_refs: List[str]):
+        try:
+            data = self._invoke_llm_with_function(prompt,function_schema)
+            chunk_scores = data.get(self.REF_LEVELS_FIELD,{})
+            validated_scores = {}
+            for ref,score in chunk_scores.items():
+                validated_scores[ref] = self._validate_score_level(
+                    score,f"ref_score[{ref}]"
+                )
+
+            # Check for missing references and assign default scores (0)
+            missing_refs = set(expected_refs) - set(validated_scores.keys())
+            if missing_refs:
+                logger.warning(
+                    f"GPT didn't return scores for {len(missing_refs)} references: {list(missing_refs)[:5]}... - defaulting to 0"
+                )
+                if len(missing_refs) < 5:
+                    for ref in missing_refs:
+                        validated_scores[
+                            ref] = ScoreLevel.NOT_DISCUSSED
+                else:
+                    raise IncompleteScoreError(
+                        f"Missing {len(missing_refs)} references"
+                    )
+
+            # Ensure we only include expected references (in case GPT returned extras)
+            final_scores = {
+                ref: validated_scores.get(ref,ScoreLevel.NOT_DISCUSSED) for ref
+                in expected_refs}
+
+            data[self.REF_SCORES_FIELD] = final_scores
+            return data,final_scores
+
+        except IncompleteScoreError:
+            raise
+
+        except Exception as e:
+            logger.error(f"Chunk GPT failed: {e}")
+            return None
+
+    def _last_regular_start(self,n: int,chunk: int,overlap: int) -> int:
+        """
+        Return the index where the *final* chunk (with title) should start.
+        If the total length fits into one chunk plus the allowed overlap,
+        analyse everything together (start = 0).
+        """
+        if n <= chunk + overlap:
+            return 0
+        step = chunk - overlap
+        return max(0,n - chunk) if step <= 0 else (n - chunk)
+
+    def _process_reference_chunks(
+            self,
+            content: str,
+            ref_names: List[str]
+    ) -> Optional[Dict[str,int]]:
+        """Process reference chunks in batches."""
+        ref_scores: Dict[str,int] = {}
+
+        last_chunk_start = self._last_regular_start(
+            len(ref_names),self.chunk_size,self.MAX_CHUNK_OVERLAP
+        )
+
+        for chunk in self.chunk_list(
+                ref_names[:last_chunk_start],self.chunk_size
+        ):
+            # prompt = self._create_chunk_prompt(content,chunk)
+            _,chunk_scores = self._grade_refs_resilient(
+                content=content,
+                refs=chunk,
+                with_title=False
+            )
+            if chunk_scores is None:
+                return None
+            ref_scores.update(chunk_scores)
+
+        return ref_scores
+
+    def _process_final_chunk_with_title(
+            self,
+            content: str,
+            ref_names: List[str],
+            title: str,
+    ) -> Optional[Dict[str,Any]]:
+        """Process final chunk and get title scores."""
+        start = self._last_regular_start(
+            len(ref_names),self.chunk_size,self.MAX_CHUNK_OVERLAP
+        )
+        final_chunk = ref_names[start:]
+
+        # prompt = self._create_final_chunk_prompt(content,final_chunk,title)
+        result = self._grade_refs_resilient(
+            content=content,
+            refs=final_chunk,
+            with_title=True,
+            sheet_title=title
+        )
+
+        if result is None:
+            return None
+
+        data,_ = result
+        return data
+
+    def get_gpt_scores(
+            self,
+            content: str,
+            ref_names: List[str],
+            title: str,
+    ) -> Optional[Dict[str,Any]]:
+        """Get GPT scores for references and title."""
+        # Process reference chunks
+        ref_scores = self._process_reference_chunks(content,ref_names)
+        if ref_scores is None:
+            return None
+
+        # Process final chunk with title
+        final_data = self._process_final_chunk_with_title(
+            content,ref_names,title
+        )
+        if final_data is None:
+            return None
+
+        # Combine scores
+        final_chunk_scores = final_data.get(self.REF_SCORES_FIELD,{})
+        ref_scores.update(final_chunk_scores)
+
+        # # Normalize to percentages
+        score_percentages = self._normalize_scores_to_percentages(
+            score_levels=ref_scores,
+            sheet_tokens=self._count_tokens(content)
+        )
+
+        # Validate title score
+        title_level = self._validate_score_level(
+            final_data.get(self.TITLE_INTEREST_FIELD),
+            self.TITLE_INTEREST_FIELD
+        )
+
+        return {
+            self.LANGUAGE_FIELD: final_data.get(
+                self.LANGUAGE_FIELD,LanguageCode.DEFAULT
+            ),
+            self.REF_LEVELS_FIELD: ref_scores,
+            self.REF_SCORES_FIELD: score_percentages,
+            self.TITLE_INTEREST_FIELD: title_level,
+            self.TITLE_INTEREST_REASON_FIELD: final_data.get(
+                self.TITLE_INTEREST_REASON_FIELD,""
+            ),
+        }
+
+    def _truncate_to_token_budget(self,text: str,max_tokens: int) -> str:
+        """Truncate text to fit within token budget using LLM summarization."""
+        if self._count_tokens(text) <= max_tokens:
+            return text
+        try:
+            prompt = f"""
+            Compress the following commentary to ≤ {max_tokens} tokens.
+            Keep every reference tag like "Genesis 1:1" or "Exodus 2:5".
+            Use clear sentences; preserve main ideas.
+
+            {text}
+            """
+            summary = self.summarizer(
+                [HumanMessage(content=prompt)]
+            ).content.strip()
+
+            if self._count_tokens(summary) <= max_tokens:
+                return summary
+            else:
+                # Fallback: character-based truncation
+                return summary[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
+
+        except Exception as e:
+            logger.error(f"Summarization failed: {e}")
+            # Fallback: character-based truncation
+            return text[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
+
+    def process_sheet_by_content(self,sheet: Dict[str,Any],
+                                 add_full_commentary=False) -> Optional[
+        Dict[str,Any]]:
+        """Score a single sheet based on its content."""
+        sheet_id = str(sheet.get(self.SHEET_ID_FIELD))
+        ref_names = sheet.get("expandedRefs",[])
+        sheet_title = sheet.get("title","")
+
+        if not ref_names:
+            logger.info(f"No expanded refs for sheet {sheet_id}, skipping")
+            return None
+
+        (quotes_only,
+         no_quotes_content,
+         full_content,
+         has_original, creativity_score) = sheet_to_text_views(sheet,
+                                                         LanguageCode.DEFAULT)
+
+        # Check for original content and reference limits
+        if (not has_original or
+                len(ref_names) > self.max_ref_to_process):
+            logger.info(f"Sheet {sheet_id}: using equal distribution")
+            score_percentages = {ref: 0 for ref in ref_names}
+            title_info = self._get_title_info(sheet_title)
+
+            return {
+                self.SHEET_ID_FIELD: sheet_id,
+                self.REF_LEVELS_FIELD: score_percentages,
+                self.CREATIVITY_SCORE_FIELD: creativity_score,
+                self.REF_SCORES_FIELD: score_percentages,
+                self.PROCESSED_AT_FIELD: datetime.utcnow(),
+                **title_info
+            }
+        content = self._sheet_to_text(no_quotes_content=no_quotes_content,
+                                                  full_content=full_content,
+                                                  max_tokens=self.max_prompt_tokens-
+                                                  self.token_margin,
+                                                  add_full_commentary=add_full_commentary)
+        # Process with GPT
+        gpt_analysis = self.get_gpt_scores(content,ref_names,sheet_title)
+        if not gpt_analysis:
+            logger.error(f"Failed to get GPT scores for sheet {sheet_id}")
+            return None
+        return {
+            self.SHEET_ID_FIELD: sheet_id,
+            self.CREATIVITY_SCORE_FIELD: creativity_score,
+            self.REF_SCORES_FIELD: gpt_analysis[self.REF_SCORES_FIELD],
+            self.REF_LEVELS_FIELD: gpt_analysis[self.REF_LEVELS_FIELD],
+            self.PROCESSED_AT_FIELD: datetime.utcnow(),
+            self.LANGUAGE_FIELD: gpt_analysis[self.LANGUAGE_FIELD],
+            self.TITLE_INTEREST_FIELD: gpt_analysis[self.TITLE_INTEREST_FIELD],
+            self.TITLE_INTEREST_REASON_FIELD:
+                gpt_analysis[self.TITLE_INTEREST_REASON_FIELD],
+        }
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
new file mode 100644
index 0000000..2d0b02f
--- /dev/null
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -0,0 +1,40 @@
+from .openai_sheets_scorer import SheetScorer
+import os
+from pathlib import Path
+from sefaria_llm_interface.scoring_io import (
+    SheetScoringInput,
+    SheetScoringOutput,
+)
+from dotenv import load_dotenv
+
+load_dotenv(Path(__file__).parent / "secrets.env")   # adjust path if needed
+
+def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
+
+    scorer = SheetScorer(
+            api_key=os.getenv("OPENAI_API_KEY"),
+        )
+    result = scorer.process_sheet_by_content(sheet=inp.sheet_content)
+    if not result:
+        return SheetScoringOutput(
+            sheet_id=result[scorer.SHEET_ID_FIELD],
+            ref_scores={},
+            ref_levels={},
+            title_interest_level=0,
+            title_interest_reason="",
+            language="",
+            creativity_score=0,
+            processed_at=None,
+        )
+    return SheetScoringOutput(
+        sheet_id=result[scorer.SHEET_ID_FIELD],
+        ref_scores=result[scorer.REF_SCORES_FIELD],
+        ref_levels=result[scorer.REF_LEVELS_FIELD],
+        title_interest_level=result[scorer.TITLE_INTEREST_FIELD],
+        title_interest_reason=result[scorer.TITLE_INTEREST_REASON_FIELD],
+        language=result[scorer.LANGUAGE_FIELD],
+        creativity_score=result[scorer.CREATIVITY_SCORE_FIELD],
+        processed_at=result["processed_at"].isoformat(),
+    )
+
+
diff --git a/app/sheet_scoring/tasks.py b/app/sheet_scoring/tasks.py
new file mode 100644
index 0000000..e3a7c30
--- /dev/null
+++ b/app/sheet_scoring/tasks.py
@@ -0,0 +1,13 @@
+from celery import shared_task
+from .sheet_scoring import score_one_sheet
+from sefaria_llm_interface.scoring_io import (
+    SheetScoringInput
+)
+from dataclasses import asdict
+
+
+@shared_task(name='llm.score_sheet')
+def score_sheet_task(raw_input:dict) -> dict:
+    inp = SheetScoringInput(**raw_input)
+    out = score_one_sheet(inp)
+    return asdict(out)
\ No newline at end of file
diff --git a/app/sheet_scoring/text_utils.py b/app/sheet_scoring/text_utils.py
new file mode 100644
index 0000000..4cdaab2
--- /dev/null
+++ b/app/sheet_scoring/text_utils.py
@@ -0,0 +1,116 @@
+import re
+import html
+from typing import Dict, List, Tuple, Any
+
+TAG_RE = re.compile(r"<[^>]+>")
+TOKEN_RE = re.compile(r"\b\w+\b",re.UNICODE)
+
+
+def strip_html(raw: str) -> str:
+    """Remove tags & entities, collapse whitespace."""
+    if not raw:
+        return ""
+    text = TAG_RE.sub("",raw)
+    text = html.unescape(text)
+    text = re.sub(r"\s+\n","\n",text)  # trim spaces before newlines
+    text = re.sub(r"[ \t]{2,}"," ",text)  # collapse runs of blanks
+    return text.strip()
+
+
+def token_count(text: str) -> int:
+    """Approximate word tokens (both English & Hebrew)."""
+    return len(TOKEN_RE.findall(text))
+
+
+def sheet_to_text_views(
+    sheet: Dict[str, Any],
+    default_lang: str = "en",
+) -> Tuple[str, str, str, bool, float]:
+    """
+    Build three plain‑text snapshots of a Sefaria sheet **and** compute a
+    creativity score.
+
+    Returns
+    -------
+    quotes_only        str   – ref + canonical text blocks
+    no_quotes          str   – title & user commentary, refs only for quotes
+    with_quotes        str   – full sheet (title, commentary, *and* quotes)
+    has_original       bool  – True if any user commentary exists
+    creativity_score   float – user_token_count / total_token_count
+    """
+
+    quotes:     List[str] = []
+    no_quotes:  List[str] = []
+    with_quotes:List[str] = []
+
+    original_tokens = 0
+    quoted_tokens   = 0
+    has_original    = False
+
+    title = strip_html(sheet.get("title", "")).strip()
+    if title:
+        tok = token_count(title)
+        original_tokens += tok
+        no_quotes.append(title)
+        with_quotes.append(title)
+
+    for blk in sheet.get("sources", []):
+        # --- outsideText (single‑lang commentary)
+        if "outsideText" in blk:
+            txt = strip_html(blk["outsideText"]).strip()
+            if txt:
+                has_original = True
+                t = token_count(txt)
+                original_tokens += t
+                no_quotes.append(txt)
+                with_quotes.append(txt)
+
+        if "outsideBiText" in blk:
+            for lang in ("en", "he"):
+                txt = strip_html(blk["outsideBiText"].get(lang, "")).strip()
+                if txt:
+                    has_original = True
+                    original_tokens += token_count(txt)
+                    no_quotes.append(txt)
+                    with_quotes.append(txt)
+
+        if "text" in blk:
+            ref   = blk.get("ref", "").strip()
+            canon = strip_html(blk["text"].get(default_lang, "")).strip()
+
+            # show ref label in all views
+            if ref:
+                no_quotes.append(ref)
+                header = f"{ref}:"
+            else:
+                header = ""
+
+            if canon:
+                # quote tokens count toward quoted_tokens
+                qtok = token_count(canon)
+                quoted_tokens += qtok
+
+                # add to quotes‑only and with_quotes
+                if header:
+                    quotes.append(header)
+                    with_quotes.append(header)
+                quotes.append(canon)
+                with_quotes.append(canon)
+
+        if "comment" in blk:
+            txt = strip_html(blk["comment"]).strip()
+            if txt:
+                has_original = True
+                original_tokens += token_count(txt)
+                no_quotes.append(txt)
+                with_quotes.append(txt)
+
+    joiner = "\n\n"
+    quotes_only  = joiner.join(quotes)
+    commentary   = joiner.join(no_quotes)
+    full_sheet   = joiner.join(with_quotes)
+
+    total_tokens = original_tokens + quoted_tokens or 1  # avoid div‑by‑zero
+    creativity   = original_tokens / total_tokens
+
+    return quotes_only, commentary, full_sheet, has_original, creativity
\ No newline at end of file

From b70c55ffc7a0a676fdc2e8a687f54ebe931bae9c Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Tue, 5 Aug 2025 14:24:27 +0300
Subject: [PATCH 02/36] =?UTF-8?q?style:=20clean=20up=20formatting=20and=20?=
 =?UTF-8?q?update=20imports=20per=20Yishai=E2=80=99s=20partial=20code=20re?=
 =?UTF-8?q?view?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace manual multiline f-strings with textwrap.dedent for prompt builders
- Simplify _invoke_llm_with_function by using getattr on additional_kwargs
- Rename SheetScoringOutput.processed_at → processed_datetime
- Move datetime→ISO conversion into __post_init__ instead of custom __init__
- Expand and clarify comments on configuration constants
- Switch from relative to absolute imports after adding LLM to PYTHONPATH
---
 .../scoring_io/scoring_io_input.py            |   2 +
 .../scoring_io/scoring_io_output.py           |  31 +-
 app/sheet_scoring/__init__.py                 |   0
 app/sheet_scoring/openai_sheets_scorer.py     | 360 ++++++++++--------
 app/sheet_scoring/sheet_scoring.py            |  14 +-
 app/sheet_scoring/tasks.py                    |   4 +-
 app/sheet_scoring/text_utils.py               |  28 +-
 7 files changed, 222 insertions(+), 217 deletions(-)
 create mode 100644 app/sheet_scoring/__init__.py

diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
index 55406fe..acff7e8 100644
--- a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
+++ b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
@@ -1,5 +1,7 @@
 from dataclasses import dataclass
 from typing import Any
+
+
 @dataclass
 class SheetScoringInput:
     sheet_content: dict[str, Any]
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
index 3f3f9ab..14bccd1 100644
--- a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
+++ b/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
@@ -1,36 +1,19 @@
 from dataclasses import dataclass
-from typing import Dict, Union, List
+from typing import Dict, Union
 from datetime import datetime
 
 
 @dataclass
 class SheetScoringOutput:
-    sheet_id:str
-    processed_at: str
+    sheet_id: str
+    processed_datetime: str
     language: str
     title_interest_level: int
     title_interest_reason: str
-    creativity_score:float
+    creativity_score: float
     ref_levels: Dict[str, int]
     ref_scores: Dict[str, float]
 
-
-    def __init__(self,
-                 sheet_id: str,
-                 ref_scores: Dict[str, float],
-                 ref_levels:Dict[str, int],
-                 processed_at: Union[str, datetime],
-                 language: str,
-                 creativity_score: float,
-                 title_interest_level: int,
-                 title_interest_reason: str):
-        self.ref_scores = ref_scores
-        self.sheet_id = sheet_id
-        self.processed_at = processed_at.isoformat() if isinstance(
-            processed_at, datetime
-        ) else processed_at
-        self.ref_levels = ref_levels
-        self.creativity_score = creativity_score
-        self.language = language
-        self.title_interest_level = title_interest_level
-        self.title_interest_reason = title_interest_reason
\ No newline at end of file
+    def __post_init__(self):
+        if isinstance(self.processed_datetime, datetime):
+            self.processed_datetime = self.processed_datetime.isoformat()
\ No newline at end of file
diff --git a/app/sheet_scoring/__init__.py b/app/sheet_scoring/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index ab7d35e..a44e72f 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -2,13 +2,12 @@
 import logging
 from datetime import datetime
 from enum import IntEnum
-from typing import Any,Dict,Iterator,List,Optional,Set,Tuple
-
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
+import textwrap
 import tiktoken
 from langchain.schema import HumanMessage
 from langchain_openai import ChatOpenAI
-
-from app.sheet_scoring.text_utils import sheet_to_text_views
+from sheet_scoring.text_utils import sheet_to_text_views
 
 # Configure logging
 logger = logging.getLogger(__name__)
@@ -41,11 +40,30 @@ class SheetScorer:
     computes creativity score based on percentage of user generated content.
 
     This class processes sheets from MongoDB, analyzes their content using OpenAI's GPT models,
-    and assigns scores for how well each biblical reference is discussed and how interesting
+    and assigns scores for how well each reference is discussed and how interesting
     the sheet title is to users.
     """
 
-    # Configuration constants
+    # Configuration constants -
+    # DEFAULT_MAX_INPUT_OUTPUT_TOKENS: total
+    # tokens (prompt+response) we’ll send in one API call. Lowering this
+    # shrinks your available context; raising it risks exceeding the model’s
+    # limit.
+    # DEFAULT_MAX_OUTPUT_TOKENS: cap on how many tokens the model
+    # may generate. If you set this too low, responses may be cut off; too
+    # high wastes quota.
+    # DEFAULT_CHUNK_SIZE: how many references to score
+    # in each batch. Larger chunks use more context (better global view) but
+    # may exceed token budgets.
+    # MAX_CHUNK_OVERLAP: how many refs to repeat
+    # between chunks. More overlap reduces missing-edge-case errors at the
+    # cost of redundant API calls.
+    # DEFAULT_MAX_REFS_TO_PROCESS: total refs
+    # before falling back to equal-distribution scoring. Hitting this limit
+    # skips heavy LLM work to avoid runaway costs. -
+    # DEFAULT_TOKEN_CHAR_RATIO: fallback characters‐per‐token estimate when
+    # encoding fails. Tweak if you find your actual token counts diverge
+    # significantly from this estimate.
     DEFAULT_MAX_OUTPUT_TOKENS = 16384
     DEFAULT_CHUNK_SIZE = 80
     DEFAULT_MAX_INPUT_OUTPUT_TOKENS = 128000
@@ -59,7 +77,7 @@ class SheetScorer:
     LANGUAGE_FIELD = "language"
     TITLE_INTEREST_REASON_FIELD = 'title_interest_reason'
     SHEET_ID_FIELD = "_id"
-    PROCESSED_AT_FIELD = "processed_at"
+    PROCESSED_AT_FIELD = "processed_datetime"
     CREATIVITY_SCORE_FIELD = 'creativity_score'
 
     # Valid score levels
@@ -83,7 +101,7 @@ def __init__(
         self.llm = self._create_json_llm(api_key,model)
         self.summarizer = self._create_text_llm(api_key,model)
 
-    def _create_json_llm(self,api_key: str,model: str) -> ChatOpenAI:
+    def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for JSON responses."""
         return ChatOpenAI(
             model=model,
@@ -95,7 +113,7 @@ def _create_json_llm(self,api_key: str,model: str) -> ChatOpenAI:
             api_key=api_key,
         )
 
-    def _create_text_llm(self,api_key: str,model: str) -> ChatOpenAI:
+    def _create_text_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for text responses."""
         return ChatOpenAI(
             model=model,
@@ -104,9 +122,9 @@ def _create_text_llm(self,api_key: str,model: str) -> ChatOpenAI:
             api_key=api_key,
         )
 
-    def _invoke_llm_with_function(self,prompt: str,
-                                  function_schema: Dict[str,Any]) -> Dict[
-        str,Any]:
+    def _invoke_llm_with_function(self, prompt: str,
+                                  function_schema: Dict[str, Any]) -> (
+            Dict)[str, Any]:
         """Invoke LLM using function calling instead of JSON mode."""
         response = self.llm.invoke(
             [HumanMessage(content=prompt)],
@@ -114,21 +132,22 @@ def _invoke_llm_with_function(self,prompt: str,
             function_call={"name": function_schema["name"]}
         )
 
-        # Extract function call arguments
-        if hasattr(
-                response,'additional_kwargs'
-        ) and 'function_call' in response.additional_kwargs:
-            function_call = response.additional_kwargs['function_call']
-            return json.loads(function_call['arguments'])
-        else:
-            raise ValueError("No function call in response")
-
-    def _get_reference_scoring_function_schema(self,ref_names: List[str]) -> \
-            Dict[str,Any]:
-        """Create function schema for reference scoring with exact reference names."""
+        function_call = getattr(response, "additional_kwargs", {}).get(
+            "function_call"
+            )
+        if function_call:
+            return json.loads(function_call["arguments"])
+
+        raise ValueError("No function call in response")
+
+    def _get_reference_scoring_function_schema(self, ref_names: List[str]) -> \
+            Dict[str, Any]:
+        """Create function schema for reference scoring with exact reference
+        names."""
         return {
             "name": "score_references",
-            "description": "Score how well each biblical reference is discussed in the sheet",
+            "description": "Score how well each reference is "
+                           "discussed in the sheet",
             "parameters": {
                 "type": "object",
                 "properties": {
@@ -153,8 +172,7 @@ def _get_reference_scoring_function_schema(self,ref_names: List[str]) -> \
             }
         }
 
-    def _get_title_scoring_schema(self) -> Dict[
-        str,Any]:
+    def _get_title_scoring_schema(self) -> Dict[str, Any]:
         """Create function schema for both reference and title scoring."""
         return {
             "name": "score_title",
@@ -168,34 +186,38 @@ def _get_title_scoring_schema(self) -> Dict[
                     },
                     self.TITLE_INTEREST_FIELD: {
                         "type": "integer",
-                        "description": "How interesting the title is to users (0-4 scale)",
+                        "description": "How interesting the title is to "
+                                       "users (0-4 scale)",
                         "minimum": 0,
                         "maximum": 4
                     },
                     self.TITLE_INTEREST_REASON_FIELD: {
                         "type": "string",
-                        "description": "Brief explanation of title interest score (max 20 words)",
+                        "description": "Brief explanation of title interest "
+                                       "score (max 20 words)",
                         "maxLength": 100
                     }
                 },
-                "required": [self.LANGUAGE_FIELD,self.TITLE_INTEREST_FIELD,
+                "required": [self.LANGUAGE_FIELD, self.TITLE_INTEREST_FIELD,
                              self.TITLE_INTEREST_REASON_FIELD],
                 "additionalProperties": False
             }
         }
 
-    def _get_full_scoring_function_schema(self,ref_names: List[str]) -> Dict[
-        str,Any]:
+    def _get_full_scoring_function_schema(self, ref_names: List[str]) -> (
+            Dict)[str, Any]:
         """Create function schema for both reference and title scoring."""
         return {
             "name": "score_sheet",
-            "description": "Score references and title interest for a Jewish study sheet",
+            "description": "Score references and title interest for a Jewish "
+                           "study sheet",
             "parameters": {
                 "type": "object",
                 "properties": {
                     self.LANGUAGE_FIELD: {
                         "type": "string",
-                        "description": "# ISO‑639‑1 code inferred from *original user‑written* content",
+                        "description": "# ISO‑639‑1 code inferred from "
+                                       "*original user‑written* content",
                     },
                     self.REF_LEVELS_FIELD: {
                         "type": "object",
@@ -214,17 +236,19 @@ def _get_full_scoring_function_schema(self,ref_names: List[str]) -> Dict[
                     },
                     self.TITLE_INTEREST_FIELD: {
                         "type": "integer",
-                        "description": "How interesting the title is to users (0-4 scale)",
+                        "description": "How interesting the title is to "
+                                       "users (0-4 scale)",
                         "minimum": 0,
                         "maximum": 4
                     },
                     self.TITLE_INTEREST_REASON_FIELD: {
                         "type": "string",
-                        "description": "Brief explanation of title interest score (max 20 words)",
+                        "description": "Brief explanation of title interest "
+                                       "score (max 20 words)",
                         "maxLength": 100
                     }
                 },
-                "required": [self.LANGUAGE_FIELD,self.REF_LEVELS_FIELD,
+                "required": [self.LANGUAGE_FIELD, self.REF_LEVELS_FIELD,
                              self.TITLE_INTEREST_FIELD,
                              self.TITLE_INTEREST_REASON_FIELD],
                 "additionalProperties": False
@@ -232,70 +256,72 @@ def _get_full_scoring_function_schema(self,ref_names: List[str]) -> Dict[
         }
 
     @staticmethod
-    def chunk_list(lst: List[Any],n: int) -> Iterator[List[Any]]:
+    def chunk_list(lst: List[Any], n: int) -> Iterator[List[Any]]:
         """Yield successive n‑sized chunks from lst."""
-        for i in range(0,len(lst),n):
+        for i in range(0, len(lst), n):
             yield lst[i: i + n]
 
-    def _count_tokens(self,text: str) -> int:
+    def _count_tokens(self, text: str) -> int:
         """Rough token count; if no encoder, fall back to char heuristic."""
         try:
             encoding = tiktoken.encoding_for_model(self.model)
             return len(encoding.encode(text))
-        except (KeyError,ValueError) as e:
+        except (KeyError, ValueError) as e:
             logger.warning(
                 f"Could not get encoding for model {self.model}: {e}"
             )
             return len(text) // self.DEFAULT_TOKEN_CHAR_RATIO
 
-    def _invoke_llm(self,prompt: str) -> Dict[str,Any]:
+    def _invoke_llm(self, prompt: str) -> Dict[str, Any]:
         """Invoke LLM with prompt and parse JSON response."""
         response = self.llm([HumanMessage(content=prompt)])
         return json.loads(response.content)
 
-    def _create_title_only_prompt_function(self,sheet_title: str) -> str:
-        return f"""
-    You are scoring THE TITLE of a Jewish study sheet for how interesting it would be to users.
-
-    SHEET TITLE:
-    {sheet_title}
-
-    TASK: Return JSON with keys `title_interest_level` (0-4) and `title_interest_reason` ( < 20 words). 
-    Choose a higher score when the title:
-
-    Title interest level (int 0–4):
-      0: Not interesting / off‑topic for users
-      1: Slight relevance, low pull
-      2: Somewhat interesting; user might skim
-      3: Interesting; user likely to open
-      4: Very compelling / must‑open
-    """
-
-    def _create_chunk_prompt_for_function(self,sheet_content: str,
+    def _create_title_only_prompt_function(self, sheet_title: str) -> str:
+        return textwrap.dedent(
+            f"""You are scoring THE TITLE of a Jewish study sheet for how interesting it would be to users.
+    
+            SHEET TITLE:
+            {sheet_title}
+        
+            TASK: Return JSON with keys `title_interest_level` (0-4) and `title_interest_reason` ( < 20 words). 
+            Choose a higher score when the title:
+        
+            Title interest level (int 0–4):
+              0: Not interesting / off‑topic for users
+              1: Slight relevance, low pull
+              2: Somewhat interesting; user might skim
+              3: Interesting; user likely to open
+              4: Very compelling / must‑open
+            """)
+
+    def _create_chunk_prompt_for_function(self, sheet_content: str,
                                           ref_names: List[str]) -> str:
-        """Create prompt for function calling (no JSON format instructions needed)."""
+        """Create prompt for function calling (no JSON format instructions
+        needed)."""
         refs_md = "\n".join(f"- {r}" for r in ref_names)
-        return f"""
-You are analyzing a Jewish study sheet. Rate how much each listed reference 
-is discussed or central in the sheet.
+        return textwrap.dedent(
+            f"""
+            You are analyzing a Jewish study sheet. Rate how much each listed reference 
+            is discussed or central in the sheet.
 
-SHEET CONTENT:
-{sheet_content}
+            SHEET CONTENT:
+            {sheet_content}
 
-REFERENCES TO EVALUATE:
-{refs_md}
+            REFERENCES TO EVALUATE:
+            {refs_md}
 
-Scoring Scale (0-4):
-  0: Quoted only, no discussion
-  1: Mentioned only through neighboring verses
-  2: Moderate discussion (some commentary)
-  3: Significant discussion (substantial commentary)  
-  4: Central focus of sheet
+            Scoring Scale (0-4):
+              0: Quoted only, no discussion
+              1: Mentioned only through neighboring verses
+              2: Moderate discussion (some commentary)
+              3: Significant discussion (substantial commentary)
+              4: Central focus of sheet
 
-Score each reference based on how thoroughly it's discussed in the content.
-"""
+            Score each reference based on how thoroughly it's discussed in the content."""
+            )
 
-    def _create_final_chunk_prompt_for_function(self,sheet_content: str,
+    def _create_final_chunk_prompt_for_function(self, sheet_content: str,
                                                 ref_names: List[str],
                                                 sheet_title: str) -> str:
         """Create prompt for final chunk with title scoring using function
@@ -303,42 +329,42 @@ def _create_final_chunk_prompt_for_function(self,sheet_content: str,
         sheet_title_clean = sheet_title.strip() or "(untitled)"
         refs_md = "\n".join(f"- {r}" for r in ref_names)
 
-        return f"""
-Analyze this Jewish study sheet and provide two types of scores:
-
-SHEET TITLE: {sheet_title_clean}
-
-SHEET CONTENT:
-{sheet_content}
-
-REFERENCES TO EVALUATE:
-{refs_md}
-
-TASKS:
-1. Reference Discussion Scoring (0-4):
-   0: Quoted only, no discussion
-   1: Mentioned only through neighboring verses  
-   2: Moderate discussion (some commentary)
-   3: Significant discussion (substantial commentary)
-   4: Central focus of sheet
-
-2. Title Interest Scoring (0-4):
-   0: Not interesting/off-topic
-   1: Slight relevance, low appeal
-   2: Somewhat interesting; user might skim
-   3: Interesting; user likely to open
-   4: Very compelling/must-open
-
-Infer the language from the original user-written content.
-"""
-
-    def _validate_score_level(self,score: Any,
+        return textwrap.dedent(f"""
+            Analyze this Jewish study sheet and provide two types of scores:
+            
+            SHEET TITLE: {sheet_title_clean}
+            
+            SHEET CONTENT:
+            {sheet_content}
+            
+            REFERENCES TO EVALUATE:
+            {refs_md}
+            
+            TASKS:
+            1. Reference Discussion Scoring (0-4):
+               0: Quoted only, no discussion
+               1: Mentioned only through neighboring verses  
+               2: Moderate discussion (some commentary)
+               3: Significant discussion (substantial commentary)
+               4: Central focus of sheet
+            
+            2. Title Interest Scoring (0-4):
+               0: Not interesting/off-topic
+               1: Slight relevance, low appeal
+               2: Somewhat interesting; user might skim
+               3: Interesting; user likely to open
+               4: Very compelling/must-open
+            
+            Infer the language from the original user-written content.
+            """)
+
+    def _validate_score_level(self, score: Any,
                               field_name: str = "score") -> int:
         """Validate and normalize score to valid range."""
         if score not in self.VALID_LEVELS:
             try:
                 score = int(score)
-            except (ValueError,TypeError):
+            except (ValueError, TypeError):
                 logger.warning(
                     f"Invalid {field_name}: {score}, defaulting to 0"
                 )
@@ -347,7 +373,7 @@ def _validate_score_level(self,score: Any,
             if score not in self.VALID_LEVELS:
                 clamped = max(
                     ScoreLevel.NOT_DISCUSSED,
-                    min(ScoreLevel.CENTRAL,score)
+                    min(ScoreLevel.CENTRAL, score)
                 )
                 logger.warning(
                     f"{field_name} {score} out of range, clamping to {clamped}"
@@ -383,13 +409,12 @@ def _sheet_to_text(
         logger.info("Sending to LLM sheet without quotations text")
         return no_quotes_content
 
-
-    def _get_title_info(self,sheet_title: str) -> Dict[str,Any]:
+    def _get_title_info(self,sheet_title: str) -> Dict[str, Any]:
         """Obtain title-interest score ONLY (used when no content)."""
         prompt = self._create_title_only_prompt_function(sheet_title)
         try:
             function_schema = self._get_title_scoring_schema()
-            data = self._invoke_llm_with_function(prompt,function_schema)
+            data = self._invoke_llm_with_function(prompt, function_schema)
             title_level = self._validate_score_level(
                 data.get(self.TITLE_INTEREST_FIELD),
                 self.TITLE_INTEREST_FIELD
@@ -399,9 +424,9 @@ def _get_title_info(self,sheet_title: str) -> Dict[str,Any]:
                 self.TITLE_INTEREST_FIELD:
                     title_level,
                 self.TITLE_INTEREST_REASON_FIELD:
-                    data.get(self.TITLE_INTEREST_REASON_FIELD,""),
+                    data.get(self.TITLE_INTEREST_REASON_FIELD, ""),
                 self.LANGUAGE_FIELD: data.get(
-                    self.LANGUAGE_FIELD,LanguageCode.DEFAULT
+                    self.LANGUAGE_FIELD, LanguageCode.DEFAULT
                 ),
             }
         except Exception as e:
@@ -415,21 +440,21 @@ def _get_title_info(self,sheet_title: str) -> Dict[str,Any]:
     def _normalize_scores_to_percentages(
             self,
             sheet_tokens: int,
-            score_levels: Dict[str,int],
+            score_levels: Dict[str, int],
             beta: float = 1500  # token mass where no penalty
-    ) -> Dict[str,float]:
+    ) -> Dict[str, float]:
 
         total_level = sum(score_levels.values()) or 1
-        size_factor = min(1.0,sheet_tokens / beta)  # clamp to 1
+        size_factor = min(1.0, sheet_tokens / beta)  # clamp to 1
 
         # small sheets (few tokens) → size_factor < 1 → percentages shrink
         percentages = {
-            ref: round(level * 100 / total_level * size_factor,2)
-            for ref,level in score_levels.items()
+            ref: round(level * 100 / total_level * size_factor, 2)
+            for ref, level in score_levels.items()
         }
 
         norm = sum(percentages.values()) or 1
-        percentages = {r: round(v * 100 / norm,2) for r,v in
+        percentages = {r: round(v * 100 / norm, 2) for r, v in
                        percentages.items()}
         return percentages
 
@@ -440,7 +465,7 @@ def _grade_refs_resilient(
             *,
             with_title: bool = False,
             sheet_title: str = ""
-    ) -> Tuple[Optional[Dict[str,Any]],Dict[str,int]]:
+    ) -> Tuple[Optional[Dict[str, Any]], Dict[str, int]]:
         """
         Robustly grade a list of refs.
         • First try the whole list.
@@ -448,52 +473,52 @@ def _grade_refs_resilient(
           split the list in two and grade each half recursively.
         """
         if not refs:
-            return {},{}
+            return {}, {}
 
         try:
             if with_title:
                 prompt = self._create_final_chunk_prompt_for_function(
-                    content,refs,sheet_title
+                    content, refs, sheet_title
                 )
                 function_schema = self._get_full_scoring_function_schema(refs)
             else:
-                prompt = self._create_chunk_prompt_for_function(content,refs)
+                prompt = self._create_chunk_prompt_for_function(content, refs)
                 function_schema = self._get_reference_scoring_function_schema(
                     refs
                 )
             data,scores = self._get_gpt_ref_scores_function(
-                prompt,function_schema,refs
+                prompt, function_schema, refs
                 )
-            return data,scores
+            return data, scores
         except Exception:
             pass
 
         # fallback branch
         if len(refs) == 1:  # nothing left to split
-            return {},{refs[0]: ScoreLevel.NOT_DISCUSSED}
+            return {}, {refs[0]: ScoreLevel.NOT_DISCUSSED}
 
         mid = len(refs) // 2
         ld,ls = self._grade_refs_resilient(
-            content,refs[:mid],
+            content, refs[:mid],
             with_title=with_title,
             sheet_title=sheet_title
         )
         rd,rs = self._grade_refs_resilient(
-            content,refs[mid:],
+            content, refs[mid:],
             with_title=with_title,
             sheet_title=sheet_title
         )
-        merged_scores = {**ls,**rs}
+        merged_scores = {**ls, **rs}
         merged_data = ld or rd
-        return merged_data,merged_scores
+        return merged_data, merged_scores
 
-    def _get_gpt_ref_scores_function(self,prompt: str,function_schema,
+    def _get_gpt_ref_scores_function(self,prompt: str, function_schema,
                                      expected_refs: List[str]):
         try:
-            data = self._invoke_llm_with_function(prompt,function_schema)
-            chunk_scores = data.get(self.REF_LEVELS_FIELD,{})
+            data = self._invoke_llm_with_function(prompt, function_schema)
+            chunk_scores = data.get(self.REF_LEVELS_FIELD, {})
             validated_scores = {}
-            for ref,score in chunk_scores.items():
+            for ref, score in chunk_scores.items():
                 validated_scores[ref] = self._validate_score_level(
                     score,f"ref_score[{ref}]"
                 )
@@ -513,13 +538,14 @@ def _get_gpt_ref_scores_function(self,prompt: str,function_schema,
                         f"Missing {len(missing_refs)} references"
                     )
 
-            # Ensure we only include expected references (in case GPT returned extras)
+            # Ensure we only include expected references (in case GPT
+            # returned extras)
             final_scores = {
-                ref: validated_scores.get(ref,ScoreLevel.NOT_DISCUSSED) for ref
+                ref: validated_scores.get(ref, ScoreLevel.NOT_DISCUSSED) for ref
                 in expected_refs}
 
             data[self.REF_SCORES_FIELD] = final_scores
-            return data,final_scores
+            return data, final_scores
 
         except IncompleteScoreError:
             raise
@@ -528,7 +554,7 @@ def _get_gpt_ref_scores_function(self,prompt: str,function_schema,
             logger.error(f"Chunk GPT failed: {e}")
             return None
 
-    def _last_regular_start(self,n: int,chunk: int,overlap: int) -> int:
+    def _last_regular_start(self, n: int, chunk: int, overlap: int) -> int:
         """
         Return the index where the *final* chunk (with title) should start.
         If the total length fits into one chunk plus the allowed overlap,
@@ -537,25 +563,25 @@ def _last_regular_start(self,n: int,chunk: int,overlap: int) -> int:
         if n <= chunk + overlap:
             return 0
         step = chunk - overlap
-        return max(0,n - chunk) if step <= 0 else (n - chunk)
+        return max(0, n - chunk) if step <= 0 else (n - chunk)
 
     def _process_reference_chunks(
             self,
             content: str,
             ref_names: List[str]
-    ) -> Optional[Dict[str,int]]:
+    ) -> Optional[Dict[str, int]]:
         """Process reference chunks in batches."""
-        ref_scores: Dict[str,int] = {}
+        ref_scores: Dict[str, int] = {}
 
         last_chunk_start = self._last_regular_start(
-            len(ref_names),self.chunk_size,self.MAX_CHUNK_OVERLAP
+            len(ref_names), self.chunk_size, self.MAX_CHUNK_OVERLAP
         )
 
         for chunk in self.chunk_list(
-                ref_names[:last_chunk_start],self.chunk_size
+                ref_names[:last_chunk_start], self.chunk_size
         ):
             # prompt = self._create_chunk_prompt(content,chunk)
-            _,chunk_scores = self._grade_refs_resilient(
+            _, chunk_scores = self._grade_refs_resilient(
                 content=content,
                 refs=chunk,
                 with_title=False
@@ -571,10 +597,10 @@ def _process_final_chunk_with_title(
             content: str,
             ref_names: List[str],
             title: str,
-    ) -> Optional[Dict[str,Any]]:
+    ) -> Optional[Dict[str, Any]]:
         """Process final chunk and get title scores."""
         start = self._last_regular_start(
-            len(ref_names),self.chunk_size,self.MAX_CHUNK_OVERLAP
+            len(ref_names), self.chunk_size, self.MAX_CHUNK_OVERLAP
         )
         final_chunk = ref_names[start:]
 
@@ -589,7 +615,7 @@ def _process_final_chunk_with_title(
         if result is None:
             return None
 
-        data,_ = result
+        data, _ = result
         return data
 
     def get_gpt_scores(
@@ -597,22 +623,22 @@ def get_gpt_scores(
             content: str,
             ref_names: List[str],
             title: str,
-    ) -> Optional[Dict[str,Any]]:
+    ) -> Optional[Dict[str, Any]]:
         """Get GPT scores for references and title."""
         # Process reference chunks
-        ref_scores = self._process_reference_chunks(content,ref_names)
+        ref_scores = self._process_reference_chunks(content, ref_names)
         if ref_scores is None:
             return None
 
         # Process final chunk with title
         final_data = self._process_final_chunk_with_title(
-            content,ref_names,title
+            content, ref_names, title
         )
         if final_data is None:
             return None
 
         # Combine scores
-        final_chunk_scores = final_data.get(self.REF_SCORES_FIELD,{})
+        final_chunk_scores = final_data.get(self.REF_SCORES_FIELD, {})
         ref_scores.update(final_chunk_scores)
 
         # # Normalize to percentages
@@ -629,17 +655,17 @@ def get_gpt_scores(
 
         return {
             self.LANGUAGE_FIELD: final_data.get(
-                self.LANGUAGE_FIELD,LanguageCode.DEFAULT
+                self.LANGUAGE_FIELD, LanguageCode.DEFAULT
             ),
             self.REF_LEVELS_FIELD: ref_scores,
             self.REF_SCORES_FIELD: score_percentages,
             self.TITLE_INTEREST_FIELD: title_level,
             self.TITLE_INTEREST_REASON_FIELD: final_data.get(
-                self.TITLE_INTEREST_REASON_FIELD,""
+                self.TITLE_INTEREST_REASON_FIELD, ""
             ),
         }
 
-    def _truncate_to_token_budget(self,text: str,max_tokens: int) -> str:
+    def _truncate_to_token_budget(self, text: str, max_tokens: int) -> str:
         """Truncate text to fit within token budget using LLM summarization."""
         if self._count_tokens(text) <= max_tokens:
             return text
@@ -666,13 +692,13 @@ def _truncate_to_token_budget(self,text: str,max_tokens: int) -> str:
             # Fallback: character-based truncation
             return text[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
 
-    def process_sheet_by_content(self,sheet: Dict[str,Any],
-                                 add_full_commentary=False) -> Optional[
-        Dict[str,Any]]:
+    def process_sheet_by_content(self, sheet: Dict[str, Any],
+                                 add_full_commentary=False) -> (
+            Optional)[Dict[str, Any]]:
         """Score a single sheet based on its content."""
         sheet_id = str(sheet.get(self.SHEET_ID_FIELD))
-        ref_names = sheet.get("expandedRefs",[])
-        sheet_title = sheet.get("title","")
+        ref_names = sheet.get("expandedRefs", [])
+        sheet_title = sheet.get("title", "")
 
         if not ref_names:
             logger.info(f"No expanded refs for sheet {sheet_id}, skipping")
@@ -699,13 +725,13 @@ def process_sheet_by_content(self,sheet: Dict[str,Any],
                 self.PROCESSED_AT_FIELD: datetime.utcnow(),
                 **title_info
             }
-        content = self._sheet_to_text(no_quotes_content=no_quotes_content,
-                                                  full_content=full_content,
-                                                  max_tokens=self.max_prompt_tokens-
-                                                  self.token_margin,
-                                                  add_full_commentary=add_full_commentary)
+        content = self._sheet_to_text(
+            no_quotes_content=no_quotes_content,
+            full_content=full_content,
+            max_tokens=self.max_prompt_tokens-self.token_margin,
+            add_full_commentary=add_full_commentary)
         # Process with GPT
-        gpt_analysis = self.get_gpt_scores(content,ref_names,sheet_title)
+        gpt_analysis = self.get_gpt_scores(content, ref_names, sheet_title)
         if not gpt_analysis:
             logger.error(f"Failed to get GPT scores for sheet {sheet_id}")
             return None
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
index 2d0b02f..c719dd7 100644
--- a/app/sheet_scoring/sheet_scoring.py
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -1,19 +1,15 @@
-from .openai_sheets_scorer import SheetScorer
+from sheet_scoring.openai_sheets_scorer import SheetScorer
 import os
 from pathlib import Path
 from sefaria_llm_interface.scoring_io import (
     SheetScoringInput,
     SheetScoringOutput,
 )
-from dotenv import load_dotenv
 
-load_dotenv(Path(__file__).parent / "secrets.env")   # adjust path if needed
 
 def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
-
     scorer = SheetScorer(
-            api_key=os.getenv("OPENAI_API_KEY"),
-        )
+        api_key=os.getenv("OPENAI_API_KEY"))
     result = scorer.process_sheet_by_content(sheet=inp.sheet_content)
     if not result:
         return SheetScoringOutput(
@@ -24,7 +20,7 @@ def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
             title_interest_reason="",
             language="",
             creativity_score=0,
-            processed_at=None,
+            processed_datetime=None,
         )
     return SheetScoringOutput(
         sheet_id=result[scorer.SHEET_ID_FIELD],
@@ -34,7 +30,5 @@ def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
         title_interest_reason=result[scorer.TITLE_INTEREST_REASON_FIELD],
         language=result[scorer.LANGUAGE_FIELD],
         creativity_score=result[scorer.CREATIVITY_SCORE_FIELD],
-        processed_at=result["processed_at"].isoformat(),
+        processed_datetime=result["processed_datetime"].isoformat(),
     )
-
-
diff --git a/app/sheet_scoring/tasks.py b/app/sheet_scoring/tasks.py
index e3a7c30..84f198f 100644
--- a/app/sheet_scoring/tasks.py
+++ b/app/sheet_scoring/tasks.py
@@ -1,5 +1,5 @@
 from celery import shared_task
-from .sheet_scoring import score_one_sheet
+from sheet_scoring.sheet_scoring import score_one_sheet
 from sefaria_llm_interface.scoring_io import (
     SheetScoringInput
 )
@@ -7,7 +7,7 @@
 
 
 @shared_task(name='llm.score_sheet')
-def score_sheet_task(raw_input:dict) -> dict:
+def score_sheet_task(raw_input: dict) -> dict:
     inp = SheetScoringInput(**raw_input)
     out = score_one_sheet(inp)
     return asdict(out)
\ No newline at end of file
diff --git a/app/sheet_scoring/text_utils.py b/app/sheet_scoring/text_utils.py
index 4cdaab2..73096d2 100644
--- a/app/sheet_scoring/text_utils.py
+++ b/app/sheet_scoring/text_utils.py
@@ -3,17 +3,17 @@
 from typing import Dict, List, Tuple, Any
 
 TAG_RE = re.compile(r"<[^>]+>")
-TOKEN_RE = re.compile(r"\b\w+\b",re.UNICODE)
+TOKEN_RE = re.compile(r"\b\w+\b", re.UNICODE)
 
 
 def strip_html(raw: str) -> str:
     """Remove tags & entities, collapse whitespace."""
     if not raw:
         return ""
-    text = TAG_RE.sub("",raw)
+    text = TAG_RE.sub("", raw)
     text = html.unescape(text)
-    text = re.sub(r"\s+\n","\n",text)  # trim spaces before newlines
-    text = re.sub(r"[ \t]{2,}"," ",text)  # collapse runs of blanks
+    text = re.sub(r"\s+\n", "\n", text)  # trim spaces before newlines
+    text = re.sub(r"[ \t]{2,}", " ", text)  # collapse runs of blanks
     return text.strip()
 
 
@@ -39,13 +39,13 @@ def sheet_to_text_views(
     creativity_score   float – user_token_count / total_token_count
     """
 
-    quotes:     List[str] = []
-    no_quotes:  List[str] = []
-    with_quotes:List[str] = []
+    quotes: List[str] = []
+    no_quotes: List[str] = []
+    with_quotes: List[str] = []
 
     original_tokens = 0
-    quoted_tokens   = 0
-    has_original    = False
+    quoted_tokens = 0
+    has_original = False
 
     title = strip_html(sheet.get("title", "")).strip()
     if title:
@@ -75,7 +75,7 @@ def sheet_to_text_views(
                     with_quotes.append(txt)
 
         if "text" in blk:
-            ref   = blk.get("ref", "").strip()
+            ref = blk.get("ref", "").strip()
             canon = strip_html(blk["text"].get(default_lang, "")).strip()
 
             # show ref label in all views
@@ -106,11 +106,11 @@ def sheet_to_text_views(
                 with_quotes.append(txt)
 
     joiner = "\n\n"
-    quotes_only  = joiner.join(quotes)
-    commentary   = joiner.join(no_quotes)
-    full_sheet   = joiner.join(with_quotes)
+    quotes_only = joiner.join(quotes)
+    commentary = joiner.join(no_quotes)
+    full_sheet = joiner.join(with_quotes)
 
     total_tokens = original_tokens + quoted_tokens or 1  # avoid div‑by‑zero
-    creativity   = original_tokens / total_tokens
+    creativity = original_tokens / total_tokens
 
     return quotes_only, commentary, full_sheet, has_original, creativity
\ No newline at end of file

From fb6e509076553abc2f6951db0a4fea198cdc9a40 Mon Sep 17 00:00:00 2001
From: nsantacruz <noahssantacruz@gmail.com>
Date: Sun, 10 Aug 2025 10:14:29 +0300
Subject: [PATCH 03/36] chore: update requirements to include langchain_openai
 package

---
 app/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 8dde30f..4bef912 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,4 +1,5 @@
-langchain[llms]~=0.2.1
+langchain[llms]
+langchain_openai
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0

From 2a1833286fc81189601c37dc2909e65c5057d9ad Mon Sep 17 00:00:00 2001
From: nsantacruz <noahssantacruz@gmail.com>
Date: Sun, 10 Aug 2025 10:19:12 +0300
Subject: [PATCH 04/36] chore: update tiktoken version specification in
 requirements

---
 app/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 4bef912..2802a31 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -10,7 +10,7 @@ tqdm~=4.66.1
 celery[redis]~=5.2.7
 diff-match-patch
 dnspython~=2.5.0
-tiktoken~=0.4.0
+tiktoken
 readability_lxml
 tenacity==8.3.0
 requests

From 42f505d8d180824eb81fff0e8a626e99dd0d6f6f Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Sun, 10 Aug 2025 10:45:33 +0300
Subject: [PATCH 05/36] style: changed input/output class names of
 sheet_scoring app to be consistent with topic_prompt and other llm apps

scoring_io_input.py -> sheet_scoring_input.py
scoring_io_output.py -> sheet_scoring_output.py
package name scoring_io -> sheet_scoring

consequently renamed all the inputs
---
 app/commentary_scoring/__init__.py                         | 0
 .../sefaria_llm_interface/scoring_io/__init__.py           | 4 ----
 .../sefaria_llm_interface/sheet_scoring/__init__.py        | 4 ++++
 .../sheet_scoring_input.py}                                | 0
 .../sheet_scoring_output.py}                               | 0
 app/sheet_scoring/openai_sheets_scorer.py                  | 6 +++---
 app/sheet_scoring/sheet_scoring.py                         | 7 +++++--
 app/sheet_scoring/tasks.py                                 | 2 +-
 8 files changed, 13 insertions(+), 10 deletions(-)
 create mode 100644 app/commentary_scoring/__init__.py
 delete mode 100644 app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
 create mode 100644 app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
 rename app/llm_interface/sefaria_llm_interface/{scoring_io/scoring_io_input.py => sheet_scoring/sheet_scoring_input.py} (100%)
 rename app/llm_interface/sefaria_llm_interface/{scoring_io/scoring_io_output.py => sheet_scoring/sheet_scoring_output.py} (100%)

diff --git a/app/commentary_scoring/__init__.py b/app/commentary_scoring/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py b/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
deleted file mode 100644
index 2c39c73..0000000
--- a/app/llm_interface/sefaria_llm_interface/scoring_io/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .scoring_io_input import SheetScoringInput
-from .scoring_io_output import SheetScoringOutput
-
-__all__ = ["SheetScoringInput", "SheetScoringOutput"]
\ No newline at end of file
diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
new file mode 100644
index 0000000..5758e1d
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
@@ -0,0 +1,4 @@
+from .sheet_scoring_input import SheetScoringInput
+from .sheet_scoring_output import SheetScoringOutput
+
+__all__ = ["SheetScoringInput", "SheetScoringOutput"]
\ No newline at end of file
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
similarity index 100%
rename from app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_input.py
rename to app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
diff --git a/app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
similarity index 100%
rename from app/llm_interface/sefaria_llm_interface/scoring_io/scoring_io_output.py
rename to app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index a44e72f..e442b2b 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -77,7 +77,7 @@ class SheetScorer:
     LANGUAGE_FIELD = "language"
     TITLE_INTEREST_REASON_FIELD = 'title_interest_reason'
     SHEET_ID_FIELD = "_id"
-    PROCESSED_AT_FIELD = "processed_datetime"
+    PROCESSED_DATETIME_FIELD = "processed_datetime"
     CREATIVITY_SCORE_FIELD = 'creativity_score'
 
     # Valid score levels
@@ -722,7 +722,7 @@ def process_sheet_by_content(self, sheet: Dict[str, Any],
                 self.REF_LEVELS_FIELD: score_percentages,
                 self.CREATIVITY_SCORE_FIELD: creativity_score,
                 self.REF_SCORES_FIELD: score_percentages,
-                self.PROCESSED_AT_FIELD: datetime.utcnow(),
+                self.PROCESSED_DATETIME_FIELD: datetime.utcnow(),
                 **title_info
             }
         content = self._sheet_to_text(
@@ -740,7 +740,7 @@ def process_sheet_by_content(self, sheet: Dict[str, Any],
             self.CREATIVITY_SCORE_FIELD: creativity_score,
             self.REF_SCORES_FIELD: gpt_analysis[self.REF_SCORES_FIELD],
             self.REF_LEVELS_FIELD: gpt_analysis[self.REF_LEVELS_FIELD],
-            self.PROCESSED_AT_FIELD: datetime.utcnow(),
+            self.PROCESSED_DATETIME_FIELD: datetime.utcnow(),
             self.LANGUAGE_FIELD: gpt_analysis[self.LANGUAGE_FIELD],
             self.TITLE_INTEREST_FIELD: gpt_analysis[self.TITLE_INTEREST_FIELD],
             self.TITLE_INTEREST_REASON_FIELD:
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
index c719dd7..e32960f 100644
--- a/app/sheet_scoring/sheet_scoring.py
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -1,11 +1,14 @@
 from sheet_scoring.openai_sheets_scorer import SheetScorer
 import os
 from pathlib import Path
-from sefaria_llm_interface.scoring_io import (
+from sefaria_llm_interface.sheet_scoring import (
     SheetScoringInput,
     SheetScoringOutput,
 )
+from dotenv import load_dotenv
+import openai
 
+load_dotenv("/Users/home/PycharmProjects/LLM/secrets.env")
 
 def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
     scorer = SheetScorer(
@@ -30,5 +33,5 @@ def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
         title_interest_reason=result[scorer.TITLE_INTEREST_REASON_FIELD],
         language=result[scorer.LANGUAGE_FIELD],
         creativity_score=result[scorer.CREATIVITY_SCORE_FIELD],
-        processed_datetime=result["processed_datetime"].isoformat(),
+        processed_datetime=result[scorer.PRCOESSED_DATETIME_FIELD].isoformat(),
     )
diff --git a/app/sheet_scoring/tasks.py b/app/sheet_scoring/tasks.py
index 84f198f..eb4aa55 100644
--- a/app/sheet_scoring/tasks.py
+++ b/app/sheet_scoring/tasks.py
@@ -1,6 +1,6 @@
 from celery import shared_task
 from sheet_scoring.sheet_scoring import score_one_sheet
-from sefaria_llm_interface.scoring_io import (
+from sefaria_llm_interface.sheet_scoring import (
     SheetScoringInput
 )
 from dataclasses import asdict

From e52e9386d5dd744a50d8fc249ebcc0188162d2d1 Mon Sep 17 00:00:00 2001
From: nsantacruz <noahssantacruz@gmail.com>
Date: Sun, 10 Aug 2025 13:06:37 +0300
Subject: [PATCH 06/36] chore: update sefaria_llm_interface version to 1.3.3 in
 requirements

---
 app/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 2802a31..9d45ce3 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -15,4 +15,4 @@ readability_lxml
 tenacity==8.3.0
 requests
 numpy
-git+https://github.com/Sefaria/LLM@v1.0.3#egg=sefaria_llm_interface&subdirectory=app/llm_interface
+git+https://github.com/Sefaria/LLM@v1.3.3#egg=sefaria_llm_interface&subdirectory=app/llm_interface

From 21c91ad709ca7e2b3795bd8c89c45fe6e3678c53 Mon Sep 17 00:00:00 2001
From: nsantacruz <noahssantacruz@gmail.com>
Date: Sun, 10 Aug 2025 14:01:59 +0300
Subject: [PATCH 07/36] chore: reduce celery worker concurrency from 50 to 4

---
 build/entrypoint.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/entrypoint.sh b/build/entrypoint.sh
index fbb070e..7c5136d 100644
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-celery -A celery_setup.app worker -Q ${QUEUE_NAME} -l INFO --concurrency 50
\ No newline at end of file
+celery -A celery_setup.app worker -Q ${QUEUE_NAME} -l INFO --concurrency 4
\ No newline at end of file

From 2f5c6e3f0256ff7b4e3bd0301fa4718a893b1c49 Mon Sep 17 00:00:00 2001
From: nsantacruz <noahssantacruz@gmail.com>
Date: Sun, 10 Aug 2025 14:02:08 +0300
Subject: [PATCH 08/36] fix: correct wording in system message for sentence
 extraction

---
 app/util/sentencizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/util/sentencizer.py b/app/util/sentencizer.py
index 363876c..d76da17 100644
--- a/app/util/sentencizer.py
+++ b/app/util/sentencizer.py
@@ -74,7 +74,7 @@ def claude_sentencizer_first_sentence(text):
     from basic_langchain.chat_models import ChatAnthropic
     from basic_langchain.schema import SystemMessage, HumanMessage
     from util.general import get_by_xml_tag
-    system = SystemMessage(content="Given a text discussing Torah topics will little to no punctuation, "
+    system = SystemMessage(content="Given a text discussing Torah topics with little to no punctuation, "
                                    "output the first sentence. Input is in <input> tags. The first sentence "
                                    "should be output verbatim as it appears in <input> wrapped in "
                                    "<first_sentence> tags. Since the input text has no punctuation, use your judgement as to where the first sentence ends. Prefer smaller sentences.")

From b09725b86cd3539a7f6b17d10fcc97b1b71f0fce Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Mon, 11 Aug 2025 13:34:23 +0300
Subject: [PATCH 09/36] style: changed imports inside sefaria-llm-main from
 local to global

---
 .../sefaria_llm_interface/sheet_scoring/__init__.py          | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
index 5758e1d..f0aaabe 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
@@ -1,4 +1,3 @@
-from .sheet_scoring_input import SheetScoringInput
-from .sheet_scoring_output import SheetScoringOutput
+from sefaria_llm_interface.sheet_scoring.sheet_scoring_input import *
+from sefaria_llm_interface.sheet_scoring.sheet_scoring_output import *
 
-__all__ = ["SheetScoringInput", "SheetScoringOutput"]
\ No newline at end of file

From 380201d1eaa5a81ff494434c73775d04b65bbdb5 Mon Sep 17 00:00:00 2001
From: Margo Levin <89296464+morganizzzm@users.noreply.github.com>
Date: Mon, 11 Aug 2025 14:11:11 +0300
Subject: [PATCH 10/36] Delete app/commentary_scoring directory

added this by mistake in one of the commits
---
 app/commentary_scoring/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 app/commentary_scoring/__init__.py

diff --git a/app/commentary_scoring/__init__.py b/app/commentary_scoring/__init__.py
deleted file mode 100644
index e69de29..0000000

From e95874d41aa167cfc80c8e67f85e1b5932cf7ac8 Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Mon, 11 Aug 2025 15:36:40 +0300
Subject: [PATCH 11/36] style: fixed spelling mistake in
 PROCESSED_DATETIME_FIELD field and removed import of env variables

---
 app/commentary_scoring/__init__.py | 0
 app/sheet_scoring/sheet_scoring.py | 6 +-----
 2 files changed, 1 insertion(+), 5 deletions(-)
 delete mode 100644 app/commentary_scoring/__init__.py

diff --git a/app/commentary_scoring/__init__.py b/app/commentary_scoring/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
index e32960f..30fcd51 100644
--- a/app/sheet_scoring/sheet_scoring.py
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -1,14 +1,10 @@
 from sheet_scoring.openai_sheets_scorer import SheetScorer
 import os
-from pathlib import Path
 from sefaria_llm_interface.sheet_scoring import (
     SheetScoringInput,
     SheetScoringOutput,
 )
-from dotenv import load_dotenv
-import openai
 
-load_dotenv("/Users/home/PycharmProjects/LLM/secrets.env")
 
 def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
     scorer = SheetScorer(
@@ -33,5 +29,5 @@ def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
         title_interest_reason=result[scorer.TITLE_INTEREST_REASON_FIELD],
         language=result[scorer.LANGUAGE_FIELD],
         creativity_score=result[scorer.CREATIVITY_SCORE_FIELD],
-        processed_datetime=result[scorer.PRCOESSED_DATETIME_FIELD].isoformat(),
+        processed_datetime=result[scorer.PROCESSED_DATETIME_FIELD].isoformat(),
     )

From c67ead66bd236cb46f9867cbb3e9e460d2b20066 Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Tue, 12 Aug 2025 12:41:05 +0300
Subject: [PATCH 12/36] feat: updated requirements.txt to use
 sefraia-llm-interface of the version v1.3.5

---
 app/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 9d45ce3..3783ac1 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -15,4 +15,4 @@ readability_lxml
 tenacity==8.3.0
 requests
 numpy
-git+https://github.com/Sefaria/LLM@v1.3.3#egg=sefaria_llm_interface&subdirectory=app/llm_interface
+git+https://github.com/Sefaria/LLM@v1.3.5#egg=sefaria_llm_interface&subdirectory=app/llm_interface

From 642db8aff0d45a9eca97c2418e19434a10ea10cb Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Wed, 13 Aug 2025 14:33:45 +0300
Subject: [PATCH 13/36] feat(llm/sheet_scoring): refactor scoring pipeline to
 use typed I/O and unified output

- Updated SheetScoringInput to have only the data used by SheetScorer: sheet_id, title, sources, expanded_refs
- refactored SheetScorer to return SheetScoringOutput directly instead of a dict
- Changed score_one_sheet to call process_sheet_by_content with explicit args (title, sources etc.)
- Added request_status and request_status_message fields to SheetScoringOutput
- Modified sheet_to_text_views to accept title/sources and return dict with quotes_only, no_quotes, with_quotes, has_original, creativity_score
- Simplified strip_html to only collapse whitespace/newlines, removed HTML entity decoding
- Added create_failure_output for standardized error handling
---
 .../sheet_scoring/sheet_scoring_input.py      |   8 +-
 .../sheet_scoring/sheet_scoring_output.py     |   2 +
 app/sheet_scoring/README.md                   |  25 ++-
 app/sheet_scoring/openai_sheets_scorer.py     | 190 +++++++++++-------
 app/sheet_scoring/sheet_scoring.py            |  26 +--
 app/sheet_scoring/text_utils.py               |  28 ++-
 6 files changed, 155 insertions(+), 124 deletions(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
index acff7e8..e64a3d6 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
@@ -1,8 +1,12 @@
 from dataclasses import dataclass
-from typing import Any
+from typing import List, Dict, Union
 
 
 @dataclass
 class SheetScoringInput:
-    sheet_content: dict[str, Any]
+    # str version of _id
+    sheet_id: str
+    title: str
+    sources: List[Dict[str, Union[str, Dict[str, str]]]]
+    expanded_refs: str
 
diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
index 14bccd1..d66c18e 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
@@ -13,6 +13,8 @@ class SheetScoringOutput:
     creativity_score: float
     ref_levels: Dict[str, int]
     ref_scores: Dict[str, float]
+    request_status: int
+    request_status_message: str
 
     def __post_init__(self):
         if isinstance(self.processed_datetime, datetime):
diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index 40f8c0d..9377bd9 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -6,7 +6,7 @@
 
 - **Reference Discussion Scoring**: Analyzes how thoroughly each biblical reference is discussed (**0-4 scale**)
 - **Title Interest Scoring**: Evaluates how engaging sheet titles are to potential readers (**0-4 scale**)  
-- **Creativity Assessment**: Computes creativity scores based on percentage of **user-generated content**
+- **Creativity Assessment**: Computes creativity scores based on percentage of **user-generated content**. 
 - **Title Interest Reason**: Explanation of title scoring. 
 
 ## Quick Start
@@ -193,15 +193,20 @@ Designed for **MongoDB integration** with expected document structure:
 
 ## Output Fields
 
-| Field | Description |
-|-------|-------------|
-| **`ref_levels`** | Raw **0-4 scores** for each reference |
-| **`ref_scores`** | **Normalized percentage scores** (sum to 100%) |
-| **`title_interest_level`** | Title **engagement score** (0-4) |
-| **`title_interest_reason`** | **Brief explanation** of title score |
-| **`language`** | **Detected language code** |
-| **`creativity_score`** | **Percentage** of user-generated content |
-| **`processed_at`** | **Processing timestamp** |
+| Field                       | Description                                    |
+|-----------------------------|------------------------------------------------|
+| **`ref_levels`**            | Raw **0-4 scores** for each reference          |
+| **`ref_scores`**            | **Normalized percentage scores** (sum to 100%) |
+| **`title_interest_level`**  | Title **engagement score** (0-4)               |
+| **`title_interest_reason`** | **Brief explanation** of title score           |
+| **`language`**              | **Detected language code**                     |
+| **`creativity_score`**      | **Percentage** of user-generated content       |
+| **`processed_datetime`**    | **Processing timestamp**                       |
+| **`request_status`**        | **Whether scoring succeded/failed**            |
+| **`request_status_message`** | **The reason why scoring failed**              |
+
+
+
 
 ## Logging
 
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index e442b2b..c7803d7 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -2,13 +2,13 @@
 import logging
 from datetime import datetime
 from enum import IntEnum
-from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
 import textwrap
 import tiktoken
 from langchain.schema import HumanMessage
 from langchain_openai import ChatOpenAI
 from sheet_scoring.text_utils import sheet_to_text_views
-
+from sefaria_llm_interface.sheet_scoring import SheetScoringOutput
 # Configure logging
 logger = logging.getLogger(__name__)
 
@@ -18,6 +18,12 @@ class IncompleteScoreError(Exception):
     pass
 
 
+class RequestStatusOptions(IntEnum):
+    """Enumeration for tracking the status of LLM processing requests."""
+    SUCCESS = 1
+    FAILURE = 0
+
+
 class ScoreLevel(IntEnum):
     """Reference discussion and title interest levels."""
     NOT_DISCUSSED = 0
@@ -91,7 +97,6 @@ def __init__(
             token_margin: int = DEFAULT_MAX_OUTPUT_TOKENS,
             max_ref_to_process: int = DEFAULT_MAX_REFS_TO_PROCESS,
             chunk_size: int = DEFAULT_CHUNK_SIZE,
-
     ):
         self.max_prompt_tokens = max_prompt_tokens
         self.token_margin = token_margin
@@ -140,7 +145,7 @@ def _invoke_llm_with_function(self, prompt: str,
 
         raise ValueError("No function call in response")
 
-    def _get_reference_scoring_function_schema(self, ref_names: List[str]) -> \
+    def _get_reference_scoring_function_schema(self, expanded_refs: List[str]) -> \
             Dict[str, Any]:
         """Create function schema for reference scoring with exact reference
         names."""
@@ -161,9 +166,9 @@ def _get_reference_scoring_function_schema(self, ref_names: List[str]) -> \
                                 "minimum": 0,
                                 "maximum": 4
                             }
-                            for ref_name in ref_names
+                            for ref_name in expanded_refs
                         },
-                        "required": ref_names,
+                        "required": expanded_refs,
                         "additionalProperties": False
                     }
                 },
@@ -204,7 +209,7 @@ def _get_title_scoring_schema(self) -> Dict[str, Any]:
             }
         }
 
-    def _get_full_scoring_function_schema(self, ref_names: List[str]) -> (
+    def _get_full_scoring_function_schema(self, expanded_refs: List[str]) -> (
             Dict)[str, Any]:
         """Create function schema for both reference and title scoring."""
         return {
@@ -229,9 +234,9 @@ def _get_full_scoring_function_schema(self, ref_names: List[str]) -> (
                                 "minimum": 0,
                                 "maximum": 4
                             }
-                            for ref_name in ref_names
+                            for ref_name in expanded_refs
                         },
-                        "required": ref_names,
+                        "required": expanded_refs,
                         "additionalProperties": False
                     },
                     self.TITLE_INTEREST_FIELD: {
@@ -296,10 +301,10 @@ def _create_title_only_prompt_function(self, sheet_title: str) -> str:
             """)
 
     def _create_chunk_prompt_for_function(self, sheet_content: str,
-                                          ref_names: List[str]) -> str:
+                                          expanded_refs: List[str]) -> str:
         """Create prompt for function calling (no JSON format instructions
         needed)."""
-        refs_md = "\n".join(f"- {r}" for r in ref_names)
+        refs_md = "\n".join(f"- {r}" for r in expanded_refs)
         return textwrap.dedent(
             f"""
             You are analyzing a Jewish study sheet. Rate how much each listed reference 
@@ -322,12 +327,12 @@ def _create_chunk_prompt_for_function(self, sheet_content: str,
             )
 
     def _create_final_chunk_prompt_for_function(self, sheet_content: str,
-                                                ref_names: List[str],
+                                                expanded_refs: List[str],
                                                 sheet_title: str) -> str:
         """Create prompt for final chunk with title scoring using function
         calling."""
         sheet_title_clean = sheet_title.strip() or "(untitled)"
-        refs_md = "\n".join(f"- {r}" for r in ref_names)
+        refs_md = "\n".join(f"- {r}" for r in expanded_refs)
 
         return textwrap.dedent(f"""
             Analyze this Jewish study sheet and provide two types of scores:
@@ -443,6 +448,8 @@ def _normalize_scores_to_percentages(
             score_levels: Dict[str, int],
             beta: float = 1500  # token mass where no penalty
     ) -> Dict[str, float]:
+        """Convert reference scores to percentages with size penalty
+        for shorter sheets."""
 
         total_level = sum(score_levels.values()) or 1
         size_factor = min(1.0, sheet_tokens / beta)  # clamp to 1
@@ -467,10 +474,14 @@ def _grade_refs_resilient(
             sheet_title: str = ""
     ) -> Tuple[Optional[Dict[str, Any]], Dict[str, int]]:
         """
-        Robustly grade a list of refs.
-        • First try the whole list.
-        • If the model returns < len(refs) scores (or JSON error),
-          split the list in two and grade each half recursively.
+        Fault-tolerant reference scoring using divide-and-conquer strategy.
+        Attempts to score all references at once via LLM. If that fails
+        (due to incomplete responses),
+        recursively splits the reference list in half and scores each
+        subset separately until all references have scores.
+        This prevents total failure when the LLM struggles with large
+        reference lists or encounters transient errors.
+
         """
         if not refs:
             return {}, {}
@@ -486,7 +497,7 @@ def _grade_refs_resilient(
                 function_schema = self._get_reference_scoring_function_schema(
                     refs
                 )
-            data,scores = self._get_gpt_ref_scores_function(
+            data, scores = self._get_gpt_ref_scores_function(
                 prompt, function_schema, refs
                 )
             return data, scores
@@ -498,12 +509,12 @@ def _grade_refs_resilient(
             return {}, {refs[0]: ScoreLevel.NOT_DISCUSSED}
 
         mid = len(refs) // 2
-        ld,ls = self._grade_refs_resilient(
+        ld, ls = self._grade_refs_resilient(
             content, refs[:mid],
             with_title=with_title,
             sheet_title=sheet_title
         )
-        rd,rs = self._grade_refs_resilient(
+        rd, rs = self._grade_refs_resilient(
             content, refs[mid:],
             with_title=with_title,
             sheet_title=sheet_title
@@ -512,27 +523,31 @@ def _grade_refs_resilient(
         merged_data = ld or rd
         return merged_data, merged_scores
 
-    def _get_gpt_ref_scores_function(self,prompt: str, function_schema,
+    def _get_gpt_ref_scores_function(self, prompt: str, function_schema,
                                      expected_refs: List[str]):
+        """Calls the LLM with structured function schema, validates all
+        returned scores are in valid range (0-4), handles missing references,
+        and ensures exactly the expected references are scored."""
         try:
             data = self._invoke_llm_with_function(prompt, function_schema)
             chunk_scores = data.get(self.REF_LEVELS_FIELD, {})
             validated_scores = {}
             for ref, score in chunk_scores.items():
                 validated_scores[ref] = self._validate_score_level(
-                    score,f"ref_score[{ref}]"
+                    score, f"ref_score[{ref}]"
                 )
 
             # Check for missing references and assign default scores (0)
             missing_refs = set(expected_refs) - set(validated_scores.keys())
             if missing_refs:
                 logger.warning(
-                    f"GPT didn't return scores for {len(missing_refs)} references: {list(missing_refs)[:5]}... - defaulting to 0"
+                    f"GPT didn't return scores for {len(missing_refs)} "
                 )
                 if len(missing_refs) < 5:
+                    logger.warning(f"Defaulting missing scores to zeros")
                     for ref in missing_refs:
-                        validated_scores[
-                            ref] = ScoreLevel.NOT_DISCUSSED
+                        validated_scores[ref] = ScoreLevel.NOT_DISCUSSED
+
                 else:
                     raise IncompleteScoreError(
                         f"Missing {len(missing_refs)} references"
@@ -568,17 +583,17 @@ def _last_regular_start(self, n: int, chunk: int, overlap: int) -> int:
     def _process_reference_chunks(
             self,
             content: str,
-            ref_names: List[str]
+            expanded_refs: List[str]
     ) -> Optional[Dict[str, int]]:
         """Process reference chunks in batches."""
         ref_scores: Dict[str, int] = {}
 
         last_chunk_start = self._last_regular_start(
-            len(ref_names), self.chunk_size, self.MAX_CHUNK_OVERLAP
+            len(expanded_refs), self.chunk_size, self.MAX_CHUNK_OVERLAP
         )
 
         for chunk in self.chunk_list(
-                ref_names[:last_chunk_start], self.chunk_size
+                expanded_refs[:last_chunk_start], self.chunk_size
         ):
             # prompt = self._create_chunk_prompt(content,chunk)
             _, chunk_scores = self._grade_refs_resilient(
@@ -595,14 +610,14 @@ def _process_reference_chunks(
     def _process_final_chunk_with_title(
             self,
             content: str,
-            ref_names: List[str],
+            expanded_refs: List[str],
             title: str,
     ) -> Optional[Dict[str, Any]]:
         """Process final chunk and get title scores."""
         start = self._last_regular_start(
-            len(ref_names), self.chunk_size, self.MAX_CHUNK_OVERLAP
+            len(expanded_refs), self.chunk_size, self.MAX_CHUNK_OVERLAP
         )
-        final_chunk = ref_names[start:]
+        final_chunk = expanded_refs[start:]
 
         # prompt = self._create_final_chunk_prompt(content,final_chunk,title)
         result = self._grade_refs_resilient(
@@ -621,18 +636,18 @@ def _process_final_chunk_with_title(
     def get_gpt_scores(
             self,
             content: str,
-            ref_names: List[str],
+            expanded_refs: List[str],
             title: str,
     ) -> Optional[Dict[str, Any]]:
         """Get GPT scores for references and title."""
         # Process reference chunks
-        ref_scores = self._process_reference_chunks(content, ref_names)
+        ref_scores = self._process_reference_chunks(content, expanded_refs)
         if ref_scores is None:
             return None
 
         # Process final chunk with title
         final_data = self._process_final_chunk_with_title(
-            content, ref_names, title
+            content, expanded_refs, title
         )
         if final_data is None:
             return None
@@ -692,57 +707,82 @@ def _truncate_to_token_budget(self, text: str, max_tokens: int) -> str:
             # Fallback: character-based truncation
             return text[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
 
-    def process_sheet_by_content(self, sheet: Dict[str, Any],
-                                 add_full_commentary=False) -> (
-            Optional)[Dict[str, Any]]:
-        """Score a single sheet based on its content."""
-        sheet_id = str(sheet.get(self.SHEET_ID_FIELD))
-        ref_names = sheet.get("expandedRefs", [])
-        sheet_title = sheet.get("title", "")
-
-        if not ref_names:
-            logger.info(f"No expanded refs for sheet {sheet_id}, skipping")
-            return None
+    def create_failure_output(self, sheet_id: str, request_status_message: str) -> (
+            SheetScoringOutput):
+        """Create a standardized failure output when sheet processing cannot
+        be completed."""
+        return SheetScoringOutput(
+            sheet_id=sheet_id,
+            processed_datetime=str(datetime.utcnow()),
+            language="",
+            title_interest_level=0,
+            title_interest_reason="",
+            creativity_score=0,
+            ref_levels={},
+            ref_scores={},
+            request_status=RequestStatusOptions.FAILURE,
+            request_status_message=request_status_message
+            )
 
-        (quotes_only,
-         no_quotes_content,
-         full_content,
-         has_original, creativity_score) = sheet_to_text_views(sheet,
-                                                         LanguageCode.DEFAULT)
+    def process_sheet_by_content(self,
+                                 sheet_id: str,
+                                 expanded_refs: List[str],
+                                 title: str,
+                                 sources: List[Dict[str, Union[str, Dict[str, str]]]],
+                                 add_full_commentary=False) -> SheetScoringOutput:
+        """Score a single sheet based on its content."""
+        if not expanded_refs:
+            request_status_message = f"No expanded refs for sheet {sheet_id}, skipping"
+            logger.info(request_status_message)
+            return self.create_failure_output(sheet_id,
+                                              request_status_message=request_status_message)
+        text_views = sheet_to_text_views(title=title, sources=sources, default_lang=LanguageCode.DEFAULT)
+        no_quotes_content = text_views["no_quotes"]
+        full_content = text_views["with_quotes"]
+        has_original = text_views["has_original"]
+        creativity_score = text_views["creativity_score"]
 
         # Check for original content and reference limits
         if (not has_original or
-                len(ref_names) > self.max_ref_to_process):
+                len(expanded_refs) > self.max_ref_to_process):
             logger.info(f"Sheet {sheet_id}: using equal distribution")
-            score_percentages = {ref: 0 for ref in ref_names}
-            title_info = self._get_title_info(sheet_title)
+            score_percentages = {ref: 0 for ref in expanded_refs}
+            title_info = self._get_title_info(title)
+
+            return SheetScoringOutput(sheet_id=sheet_id,
+                                      ref_levels=score_percentages,
+                                      ref_scores=score_percentages,
+                                      processed_datetime=str(datetime.utcnow()),
+                                      creativity_score=creativity_score,
+                                      title_interest_level=title_info[self.TITLE_INTEREST_FIELD],
+                                      title_interest_reason=title_info[self.TITLE_INTEREST_REASON_FIELD],
+                                      language=title_info[self.LANGUAGE_FIELD],
+                                      request_status=RequestStatusOptions.SUCCESS,
+                                      request_status_message="The sheet has no user generated content"
+                                      )
 
-            return {
-                self.SHEET_ID_FIELD: sheet_id,
-                self.REF_LEVELS_FIELD: score_percentages,
-                self.CREATIVITY_SCORE_FIELD: creativity_score,
-                self.REF_SCORES_FIELD: score_percentages,
-                self.PROCESSED_DATETIME_FIELD: datetime.utcnow(),
-                **title_info
-            }
         content = self._sheet_to_text(
             no_quotes_content=no_quotes_content,
             full_content=full_content,
             max_tokens=self.max_prompt_tokens-self.token_margin,
             add_full_commentary=add_full_commentary)
         # Process with GPT
-        gpt_analysis = self.get_gpt_scores(content, ref_names, sheet_title)
+        gpt_analysis = self.get_gpt_scores(content, expanded_refs, title)
         if not gpt_analysis:
-            logger.error(f"Failed to get GPT scores for sheet {sheet_id}")
-            return None
-        return {
-            self.SHEET_ID_FIELD: sheet_id,
-            self.CREATIVITY_SCORE_FIELD: creativity_score,
-            self.REF_SCORES_FIELD: gpt_analysis[self.REF_SCORES_FIELD],
-            self.REF_LEVELS_FIELD: gpt_analysis[self.REF_LEVELS_FIELD],
-            self.PROCESSED_DATETIME_FIELD: datetime.utcnow(),
-            self.LANGUAGE_FIELD: gpt_analysis[self.LANGUAGE_FIELD],
-            self.TITLE_INTEREST_FIELD: gpt_analysis[self.TITLE_INTEREST_FIELD],
-            self.TITLE_INTEREST_REASON_FIELD:
-                gpt_analysis[self.TITLE_INTEREST_REASON_FIELD],
-        }
+            request_status_message=f"Failed to get GPT scores for sheet {sheet_id}"
+            logger.error(request_status_message)
+            return self.create_failure_output(sheet_id=sheet_id,
+                                              request_status_message=request_status_message)
+
+        return SheetScoringOutput(
+                sheet_id=sheet_id,
+                ref_levels=gpt_analysis[self.REF_LEVELS_FIELD],
+                ref_scores=gpt_analysis[self.REF_SCORES_FIELD],
+                processed_datetime=str(datetime.utcnow()),
+                creativity_score=creativity_score,
+                title_interest_level=gpt_analysis[self.TITLE_INTEREST_FIELD],
+                title_interest_reason=gpt_analysis[self.TITLE_INTEREST_REASON_FIELD],
+                language=gpt_analysis[self.LANGUAGE_FIELD],
+                request_status=RequestStatusOptions.SUCCESS,
+                request_status_message=""
+                )
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
index 30fcd51..62431f3 100644
--- a/app/sheet_scoring/sheet_scoring.py
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -9,25 +9,7 @@
 def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
     scorer = SheetScorer(
         api_key=os.getenv("OPENAI_API_KEY"))
-    result = scorer.process_sheet_by_content(sheet=inp.sheet_content)
-    if not result:
-        return SheetScoringOutput(
-            sheet_id=result[scorer.SHEET_ID_FIELD],
-            ref_scores={},
-            ref_levels={},
-            title_interest_level=0,
-            title_interest_reason="",
-            language="",
-            creativity_score=0,
-            processed_datetime=None,
-        )
-    return SheetScoringOutput(
-        sheet_id=result[scorer.SHEET_ID_FIELD],
-        ref_scores=result[scorer.REF_SCORES_FIELD],
-        ref_levels=result[scorer.REF_LEVELS_FIELD],
-        title_interest_level=result[scorer.TITLE_INTEREST_FIELD],
-        title_interest_reason=result[scorer.TITLE_INTEREST_REASON_FIELD],
-        language=result[scorer.LANGUAGE_FIELD],
-        creativity_score=result[scorer.CREATIVITY_SCORE_FIELD],
-        processed_datetime=result[scorer.PROCESSED_DATETIME_FIELD].isoformat(),
-    )
+    return scorer.process_sheet_by_content(sheet_id=inp.sheet_id,
+                                           title=inp.title,
+                                           sources=inp.sources,
+                                           expanded_refs=inp.expanded_refs)
\ No newline at end of file
diff --git a/app/sheet_scoring/text_utils.py b/app/sheet_scoring/text_utils.py
index 73096d2..551e282 100644
--- a/app/sheet_scoring/text_utils.py
+++ b/app/sheet_scoring/text_utils.py
@@ -1,8 +1,6 @@
 import re
-import html
-from typing import Dict, List, Tuple, Any
+from typing import Dict, List, Union, Any
 
-TAG_RE = re.compile(r"<[^>]+>")
 TOKEN_RE = re.compile(r"\b\w+\b", re.UNICODE)
 
 
@@ -10,11 +8,7 @@ def strip_html(raw: str) -> str:
     """Remove tags & entities, collapse whitespace."""
     if not raw:
         return ""
-    text = TAG_RE.sub("", raw)
-    text = html.unescape(text)
-    text = re.sub(r"\s+\n", "\n", text)  # trim spaces before newlines
-    text = re.sub(r"[ \t]{2,}", " ", text)  # collapse runs of blanks
-    return text.strip()
+    return '\n'.join([' '.join(line.split()) for line in raw.split('\n')])
 
 
 def token_count(text: str) -> int:
@@ -22,10 +16,9 @@ def token_count(text: str) -> int:
     return len(TOKEN_RE.findall(text))
 
 
-def sheet_to_text_views(
-    sheet: Dict[str, Any],
-    default_lang: str = "en",
-) -> Tuple[str, str, str, bool, float]:
+def sheet_to_text_views(title: str,
+                        sources: List[Dict[str, Union[str, Dict[str, str]]]],
+                        default_lang: str = "en") -> Dict[str, Any]:
     """
     Build three plain‑text snapshots of a Sefaria sheet **and** compute a
     creativity score.
@@ -47,14 +40,13 @@ def sheet_to_text_views(
     quoted_tokens = 0
     has_original = False
 
-    title = strip_html(sheet.get("title", "")).strip()
     if title:
         tok = token_count(title)
         original_tokens += tok
         no_quotes.append(title)
         with_quotes.append(title)
 
-    for blk in sheet.get("sources", []):
+    for blk in sources:
         # --- outsideText (single‑lang commentary)
         if "outsideText" in blk:
             txt = strip_html(blk["outsideText"]).strip()
@@ -113,4 +105,10 @@ def sheet_to_text_views(
     total_tokens = original_tokens + quoted_tokens or 1  # avoid div‑by‑zero
     creativity = original_tokens / total_tokens
 
-    return quotes_only, commentary, full_sheet, has_original, creativity
\ No newline at end of file
+    return {
+        "quotes_only": quotes_only,
+        "no_quotes": commentary,
+        "with_quotes": full_sheet,
+        "has_original": has_original,
+        "creativity_score": creativity
+    }
\ No newline at end of file

From ac0e7801fbf93e769e32c1405626b60e2131532b Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Wed, 13 Aug 2025 14:53:00 +0300
Subject: [PATCH 14/36] feat: released new package v1.3.6 and updated the
 requirements.txt

---
 app/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 3783ac1..0889b97 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -15,4 +15,4 @@ readability_lxml
 tenacity==8.3.0
 requests
 numpy
-git+https://github.com/Sefaria/LLM@v1.3.5#egg=sefaria_llm_interface&subdirectory=app/llm_interface
+git+https://github.com/Sefaria/LLM@v1.3.6#egg=sefaria_llm_interface&subdirectory=app/llm_interface

From ee5e14d42e65a6feeb6321c172aba89442e3c547 Mon Sep 17 00:00:00 2001
From: morganizzzm <morganizzzm@gmail.com>
Date: Tue, 19 Aug 2025 14:02:09 +0300
Subject: [PATCH 15/36] feat: - updated README - deleted from
 openai_sheets_scorer.py unused field SHEET_ID_FIELD - changed the explanation
 commentary of id field in SheetScoringInput

---
 .../sheet_scoring/sheet_scoring_input.py      |   2 +-
 app/sheet_scoring/README.md                   | 140 +++++++++---------
 app/sheet_scoring/openai_sheets_scorer.py     |  21 ++-
 3 files changed, 80 insertions(+), 83 deletions(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
index e64a3d6..18583f4 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
@@ -4,7 +4,7 @@
 
 @dataclass
 class SheetScoringInput:
-    # str version of _id
+    # str version of id
     sheet_id: str
     title: str
     sources: List[Dict[str, Union[str, Dict[str, str]]]]
diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index 9377bd9..bcf209d 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -1,42 +1,54 @@
 # SheetScorer - Jewish Study Sheet Analysis Tool
 
-**SheetScorer** is a Python tool that uses **LLMs** to automatically analyze and score Jewish study sheets for reference relevance and title interest. It processes sheets from **MongoDB**, evaluates how well biblical references are discussed, and assigns engagement scores to sheet titles.
+**SheetScorer** is a Python tool that uses **LLMs** to automatically analyze 
+and score Jewish study sheets for reference relevance and title interest. 
+It processes sheets, evaluates how well each cited reference
+is discussed, and assigns engagement scores to sheet titles.
 
 ## Scores Extracted
 
-- **Reference Discussion Scoring**: Analyzes how thoroughly each biblical reference is discussed (**0-4 scale**)
+- **Reference Discussion Scoring**: Analyzes how thoroughly each reference is discussed (**0-4 scale**)
 - **Title Interest Scoring**: Evaluates how engaging sheet titles are to potential readers (**0-4 scale**)  
 - **Creativity Assessment**: Computes creativity scores based on percentage of **user-generated content**. 
 - **Title Interest Reason**: Explanation of title scoring. 
+- **Language**: Language of the sheet [all the languages are supported not only he and en]. 
 
 ## Quick Start
 
 ```python
-from sheet_scorer import SheetScorer
-
-# Initialize scorer
-scorer = SheetScorer(
-    api_key="your-openai-api-key",
-    model="gpt-4o-mini"
+from sheet_scoring.sheet_scoring import score_one_sheet
+from sefaria_llm_interface.sheet_scoring import SheetScoringInput
+
+input_data = SheetScoringInput(
+    sheet_id="123",
+    title="Understanding Genesis Creation",
+    expanded_refs=["Genesis 1:1", "Genesis 1:2"],
+    sources=[
+        {"outsideText": "This commentary explores..."},
+        {"ref": "Genesis 1:1", "text": {"en": "In the beginning..."}, "comment": "Analysis here..."}
+    ]
 )
 
-# Process a sheet
-sheet_data = {
-    "_id": "sheet123",
-    "title": "Understanding Genesis Creation",
-    "expandedRefs": ["Genesis 1:1", "Genesis 1:2", "Genesis 1:3"],
-    # ... other sheet content
-}
-
-result = scorer.process_sheet_by_content(sheet_data)
+result = score_one_sheet(input_data)
+print(f"Title score: {result.title_interest_level}")
+print(f"Ref scores: {result.ref_scores}")
 print(result)
 ```
 
 ## Scoring System
 
+### Architecture
+
+#### sheet_scoring (package)
+- sheet_scoring.py - Main API with score_one_sheet() function 
+- tasks.py - Celery task wrapper for async processing 
+- text_utils.py - Content parsing and token counting utilities 
+- openai_sheets_scorer.py - Core LLM scoring engine
+- README.md
+
 ### Reference Discussion Levels
 
-The tool evaluates how well each biblical reference is discussed using a **0-4 scale**:
+The tool evaluates how well each reference is discussed using a **0-4 scale**:
 
 | Level | Description |
 |-------|-------------|
@@ -60,66 +72,59 @@ Sheet titles are scored for **user engagement** on a **0-4 scale**:
 
 ### Creativity Score
 
-Calculated as the **percentage of user-generated content** versus all text (including quoted canonical text). Higher scores indicate more **original commentary** and analysis.
+user_tokens / total_tokens - Higher = more original content vs canonical quotes.
 
-## Configuration Options
+### Language
+ISO-639-1 language code of the sheet, and in case sheet sheet has no user generated content language code of the title.
 
-### Initialization Parameters
+## Data Structures
+#### Input (SheetScoringInput)
 
 ```python
-scorer = SheetScorer(
-    api_key="your-api-key",           # OpenAI API key
-    model="gpt-4o-mini",              # Model to use
-    max_prompt_tokens=128000,         # Maximum input tokens
-    token_margin=16384,               # Reserved tokens for output
-    max_ref_to_process=800,           # Maximum references to process
-    chunk_size=80                     # References per chunk
-)
+{
+    "sheet_id": "123",
+    "title": "Sheet title",
+    "expanded_refs": ["Genesis 1:1", "Exodus 2:3"],
+    "sources": [
+        {"outsideText": "User commentary"},
+        {"outsideBiText": {"en": "English", "he": "Hebrew"}},
+        {"ref": "Genesis 1:1", "text": {"en": "Quote"}, "comment": "Analysis"}
+    ]
+}
 ```
-
-### Key Constants
-
-- **DEFAULT_MAX_OUTPUT_TOKENS**: **16384**
-- **DEFAULT_CHUNK_SIZE**: **80** references per processing chunk
-- **DEFAULT_MAX_INPUT_OUTPUT_TOKENS**: **128000** total token limit
-- **MAX_CHUNK_OVERLAP**: **10** references overlap between chunks
-
-## Core Methods
-
-### **process_sheet_by_content(sheet, add_full_comment)**
-
-**Main method** to process a complete sheet and return scores.
-
-**Parameters:**
-- `sheet` (**Dict**): **MongoDB** sheet document containing title, references, and content
-- `add_full_comment` (**bool**): parameter that allows to add quotations text to input that LLM receives
-
-**Returns:**
-- **Dictionary** with scoring results or **None** if processing fails
-
-**Example Output:**
+#### Output (SheetScoringOutput)
 ```python
 {
-    "_id": "sheet123",
-    "ref_levels": {"Genesis 1:1": 3, "Genesis 1:2": 2},
-    "ref_scores": {"Genesis 1:1": 60.0, "Genesis 1:2": 40.0},
+    "sheet_id": "123",
+    "ref_levels": {"Genesis 1:1": 3, "Exodus 2:3": 2},      # Raw 0-4 scores
+    "ref_scores": {"Genesis 1:1": 60.0, "Exodus 2:3": 40.0}, # Normalized %
     "title_interest_level": 3,
     "title_interest_reason": "Compelling theological question",
     "language": "en",
     "creativity_score": 0.75,
-    "processed_at": "2025-01-31T10:30:00Z"
+    "processed_datetime": "2025-01-31T10:30:00Z",
+    "request_status": 1,  # 1=success, 0=failure
+    "request_status_message": ""
 }
 ```
-! ref_scores is normalized version of ref_levels
 
-### **get_gpt_scores(content, ref_names, title)**
+## Configuration Options
+
+### Initialization Parameters
 
-**Core scoring method** that processes content and returns analysis.
+```python
+scorer = SheetScorer(
+    api_key=os.getenv("OPENAI_API_KEY"),
+    model="gpt-4o-mini",                    # Default model
+    max_prompt_tokens=128000,               # Input token budget
+    token_margin=16384,                     # Reserved for output
+    max_ref_to_process=800,                 # Max num of refs that can be processed 
+    chunk_size=80                           # Refs per LLM call
+)
+```
 
-**Parameters:**
-- `content` (**str**): Sheet text content to analyze
-- `ref_names` (**List[str]**): List of biblical references to score
-- `title` (**str**): Sheet title to evaluate
+The constants DEFAULT_MAX_OUTPUT_TOKENS, DEFAULT_MAX_INPUT_OUTPUT_TOKENS are model specific 
+and can be found on the internet.
 
 ## Content Processing Strategy
 
@@ -127,7 +132,7 @@ The tool uses an **adjustable approach** for canonical quotations:
 
 1. **Always includes** all user commentary and **original content**
 2. **Conditionally includes** canonical quotes only if the **entire bundle** fits within token limits
-and **add_full_comment is set to True** 
+and **add_full_commentary is set to True** 
 3. **Truncates intelligently** using **LLM summarization** when content exceeds limits 
    4. ***LLM Summarization***: Uses secondary LLM to compress content while preserving key information 
    5. ***Reference Preservation***: Maintains all biblical reference tags during compression 
@@ -184,7 +189,7 @@ Designed for **MongoDB integration** with expected document structure:
 
 ```python
 {
-    "_id": "unique_sheet_id",
+    "id": "unique id",
     "title": "Sheet Title",
     "expandedRefs": ["Genesis 1:1", "Exodus 2:3"],
     # Additional sheet content fields...
@@ -223,10 +228,3 @@ logging.getLogger('sheet_scorer').setLevel(logging.INFO)
 ```
 
 
-## Language Support
-
-Supports **automatic detection** and processing of:
-
-- **English** (`en`) - **Default language**
-- **Hebrew** (`he`) - Full **RTL support**
-- Language detection based on **original user-written content**
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index c7803d7..be05e2e 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -82,7 +82,6 @@ class SheetScorer:
     TITLE_INTEREST_FIELD = "title_interest_level"
     LANGUAGE_FIELD = "language"
     TITLE_INTEREST_REASON_FIELD = 'title_interest_reason'
-    SHEET_ID_FIELD = "_id"
     PROCESSED_DATETIME_FIELD = "processed_datetime"
     CREATIVITY_SCORE_FIELD = 'creativity_score'
 
@@ -317,11 +316,11 @@ def _create_chunk_prompt_for_function(self, sheet_content: str,
             {refs_md}
 
             Scoring Scale (0-4):
-              0: Quoted only, no discussion
-              1: Mentioned only through neighboring verses
-              2: Moderate discussion (some commentary)
-              3: Significant discussion (substantial commentary)
-              4: Central focus of sheet
+              {ScoreLevel.NOT_DISCUSSED}: Quoted only, no discussion
+              {ScoreLevel.MINIMAL}: Mentioned only through neighboring verses
+              {ScoreLevel.MODERATE}: Moderate discussion (some commentary)
+              {ScoreLevel.SIGNIFICANT}: Significant discussion (substantial commentary)
+              {ScoreLevel.CENTRAL}: Central focus of sheet
 
             Score each reference based on how thoroughly it's discussed in the content."""
             )
@@ -347,11 +346,11 @@ def _create_final_chunk_prompt_for_function(self, sheet_content: str,
             
             TASKS:
             1. Reference Discussion Scoring (0-4):
-               0: Quoted only, no discussion
-               1: Mentioned only through neighboring verses  
-               2: Moderate discussion (some commentary)
-               3: Significant discussion (substantial commentary)
-               4: Central focus of sheet
+              {ScoreLevel.NOT_DISCUSSED}: Quoted only, no discussion
+              {ScoreLevel.MINIMAL}: Mentioned only through neighboring verses
+              {ScoreLevel.MODERATE}: Moderate discussion (some commentary)
+              {ScoreLevel.SIGNIFICANT}: Significant discussion (substantial commentary)
+              {ScoreLevel.CENTRAL}: Central focus of sheet
             
             2. Title Interest Scoring (0-4):
                0: Not interesting/off-topic

From 4eee4d3b93d57f2d073cccabee48557375bd9b2a Mon Sep 17 00:00:00 2001
From: yodem <yotam@sefaria.org>
Date: Wed, 10 Dec 2025 10:34:35 +0200
Subject: [PATCH 16/36] fix(style): updated comment for clarity in
 SheetScoringInput class

---
 .../sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
index 18583f4..11bc9fb 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
@@ -4,7 +4,7 @@
 
 @dataclass
 class SheetScoringInput:
-    # str version of id
+    # str version of id 
     sheet_id: str
     title: str
     sources: List[Dict[str, Union[str, Dict[str, str]]]]

From 921ee5321ac837d04d0f19aa18c6801ffa3f89cb Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 30 Dec 2025 14:59:14 +0200
Subject: [PATCH 17/36] chore: spacing

---
 app/sheet_scoring/README.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index bcf209d..cf20ae3 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -198,16 +198,16 @@ Designed for **MongoDB integration** with expected document structure:
 
 ## Output Fields
 
-| Field                       | Description                                    |
-|-----------------------------|------------------------------------------------|
-| **`ref_levels`**            | Raw **0-4 scores** for each reference          |
-| **`ref_scores`**            | **Normalized percentage scores** (sum to 100%) |
-| **`title_interest_level`**  | Title **engagement score** (0-4)               |
-| **`title_interest_reason`** | **Brief explanation** of title score           |
-| **`language`**              | **Detected language code**                     |
-| **`creativity_score`**      | **Percentage** of user-generated content       |
-| **`processed_datetime`**    | **Processing timestamp**                       |
-| **`request_status`**        | **Whether scoring succeded/failed**            |
+| Field                        | Description                                    |
+|------------------------------|------------------------------------------------|
+| **`ref_levels`**             | Raw **0-4 scores** for each reference          |
+| **`ref_scores`**             | **Normalized percentage scores** (sum to 100%) |
+| **`title_interest_level`**   | Title **engagement score** (0-4)               |
+| **`title_interest_reason`**  | **Brief explanation** of title score           |
+| **`language`**               | **Detected language code**                     |
+| **`creativity_score`**       | **Percentage** of user-generated content       |
+| **`processed_datetime`**     | **Processing timestamp**                       |
+| **`request_status`**         | **Whether scoring succeded/failed**            |
 | **`request_status_message`** | **The reason why scoring failed**              |
 
 

From 72c0de6e95a613b76e5cc12fcff92777001a3371 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 10:48:08 +0200
Subject: [PATCH 18/36] upgrade langchain

---
 app/requirements.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 0889b97..28a71b3 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,5 +1,6 @@
-langchain[llms]
-langchain_openai
+langchain>=0.2.17
+langchain-core>=0.2.43
+langchain-openai>=0.2.0
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0

From bcf6cee57b126015b92468fe0257c3f7088348cd Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 11:06:22 +0200
Subject: [PATCH 19/36] upgrade langchain

---
 app/requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 28a71b3..c1e107e 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,6 +1,6 @@
-langchain>=0.2.17
-langchain-core>=0.2.43
-langchain-openai>=0.2.0
+langchain>=0.1.20
+langchain-core>=0.1.53
+langchain-openai>=0.1.10
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0

From 3246b06a54d594463e135cb36257f4f39c058f43 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 14:34:57 +0200
Subject: [PATCH 20/36] downgrade langchain

---
 app/requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index c1e107e..52b90d7 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,6 +1,6 @@
-langchain>=0.1.20
-langchain-core>=0.1.53
-langchain-openai>=0.1.10
+langchain==0.1.20
+langchain-core==0.1.53
+langchain-openai==0.1.10
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0

From a0c853ef4647c6acd1c7a0af647f0bb140c4ad41 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 15:10:04 +0200
Subject: [PATCH 21/36] langchain versions

---
 app/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 52b90d7..0592471 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,5 +1,5 @@
-langchain==0.1.20
-langchain-core==0.1.53
+langchain>=0.2.17
+langchain-core>=0.2.43
 langchain-openai==0.1.10
 anthropic~=0.26.1
 stanza~=1.5.0

From 0fe819c346a370ca6b98dde15489f7dd9c1301ac Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 15:42:24 +0200
Subject: [PATCH 22/36] langchain versions

---
 app/requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index 0592471..bf52fc1 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,6 +1,6 @@
-langchain>=0.2.17
-langchain-core>=0.2.43
-langchain-openai==0.1.10
+langchain==0.1.20
+langchain-core==0.1.52
+langchain-openai==0.1.8
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0

From 6d5c9e54fcf70d9c42623e5f15d08a6dce59e808 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 15:48:37 +0200
Subject: [PATCH 23/36] langchain versions

---
 app/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/requirements.txt b/app/requirements.txt
index bf52fc1..9e81f06 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,5 +1,5 @@
-langchain==0.1.20
-langchain-core==0.1.52
+langchain==0.2.1
+langchain-core==0.2.2
 langchain-openai==0.1.8
 anthropic~=0.26.1
 stanza~=1.5.0

From 5c7cefe50e86bb61e8e724b3dcc5a5e43f711307 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Tue, 6 Jan 2026 16:11:11 +0200
Subject: [PATCH 24/36] use httpx to prevent ChatOpenAI from getting proxies

---
 app/requirements.txt                      | 1 +
 app/sheet_scoring/openai_sheets_scorer.py | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/app/requirements.txt b/app/requirements.txt
index 9e81f06..3a75641 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -4,6 +4,7 @@ langchain-openai==0.1.8
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0
+httpx~=0.27.0
 typer~=0.4.1
 pydantic~=2.7.1
 loguru~=0.7.2
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index be05e2e..76f6a5b 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
 import textwrap
 import tiktoken
+import httpx
 from langchain.schema import HumanMessage
 from langchain_openai import ChatOpenAI
 from sheet_scoring.text_utils import sheet_to_text_views
@@ -107,6 +108,7 @@ def __init__(
 
     def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for JSON responses."""
+        http_client = httpx.Client()
         return ChatOpenAI(
             model=model,
             temperature=0,
@@ -115,15 +117,18 @@ def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
             presence_penalty=0,
             seed=42,
             api_key=api_key,
+            http_client=http_client,
         )
 
     def _create_text_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for text responses."""
+        http_client = httpx.Client()
         return ChatOpenAI(
             model=model,
             temperature=0,
             model_kwargs={"response_format": {"type": "text"}},
             api_key=api_key,
+            http_client=http_client,
         )
 
     def _invoke_llm_with_function(self, prompt: str,

From 1e6247ded4b2c83fdcbba5ce8cd614b1086edc6f Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:01:47 +0200
Subject: [PATCH 25/36] fix: right data type

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
index 11bc9fb..e6fcb9d 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
@@ -8,5 +8,5 @@ class SheetScoringInput:
     sheet_id: str
     title: str
     sources: List[Dict[str, Union[str, Dict[str, str]]]]
-    expanded_refs: str
+    expanded_refs: List[str]
 

From 96e4ab62fcbb2136b1b8f355ced62772cbfd216e Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:02:11 +0200
Subject: [PATCH 26/36] chore: remove import

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
index d66c18e..22f5ba6 100644
--- a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Dict, Union
+from typing import Dict
 from datetime import datetime
 
 

From d8e42276480f768b3a59ca075d0d6320a1650665 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:07:26 +0200
Subject: [PATCH 27/36] docs: fix

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index cf20ae3..b74bcc0 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -75,7 +75,7 @@ Sheet titles are scored for **user engagement** on a **0-4 scale**:
 user_tokens / total_tokens - Higher = more original content vs canonical quotes.
 
 ### Language
-ISO-639-1 language code of the sheet, and in case sheet sheet has no user generated content language code of the title.
+ISO-639-1 language code of the sheet, and in case the sheet has no user generated content, the language code of the title.
 
 ## Data Structures
 #### Input (SheetScoringInput)

From 1b38a697b7889a19ed941b27c2e4106cc571648c Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:08:12 +0200
Subject: [PATCH 28/36] docs: fix numbers

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index b74bcc0..6af1b86 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -134,10 +134,10 @@ The tool uses an **adjustable approach** for canonical quotations:
 2. **Conditionally includes** canonical quotes only if the **entire bundle** fits within token limits
 and **add_full_commentary is set to True** 
 3. **Truncates intelligently** using **LLM summarization** when content exceeds limits 
-   4. ***LLM Summarization***: Uses secondary LLM to compress content while preserving key information 
-   5. ***Reference Preservation***: Maintains all biblical reference tags during compression 
-   6. ***Character Fallback***: Falls back to character-based truncation if summarization fails
 
+    1. ***LLM Summarization***: Uses secondary LLM to compress content while preserving key information 
+    2. ***Reference Preservation***: Maintains all biblical reference tags during compression 
+    3. ***Character Fallback***: Falls back to character-based truncation if summarization fails
 ## Grading Strategy 
 Processed content is sent to LLM, together with references for grading: 
 

From af5155ec3b34913423337ea02f81e4f5a16445cd Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:08:54 +0200
Subject: [PATCH 29/36] chore: add spaces

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/openai_sheets_scorer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index 76f6a5b..18e2bbd 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -103,8 +103,8 @@ def __init__(
         self.model = model
         self.chunk_size = chunk_size
         self.max_ref_to_process = max_ref_to_process
-        self.llm = self._create_json_llm(api_key,model)
-        self.summarizer = self._create_text_llm(api_key,model)
+        self.llm = self._create_json_llm(api_key, model)
+        self.summarizer = self._create_text_llm(api_key, model)
 
     def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for JSON responses."""

From c16caa5f3de2994ecec9c890a63715d2e43a01c9 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Thu, 8 Jan 2026 10:23:32 +0200
Subject: [PATCH 30/36] us Client with a "with"

---
 app/sheet_scoring/README.md               |  7 ++++---
 app/sheet_scoring/openai_sheets_scorer.py | 20 ++++++++++++++++----
 app/sheet_scoring/sheet_scoring.py        | 11 +++++------
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
index 6af1b86..1c789fe 100644
--- a/app/sheet_scoring/README.md
+++ b/app/sheet_scoring/README.md
@@ -113,14 +113,15 @@ ISO-639-1 language code of the sheet, and in case the sheet has no user generate
 ### Initialization Parameters
 
 ```python
-scorer = SheetScorer(
+with SheetScorer(
     api_key=os.getenv("OPENAI_API_KEY"),
     model="gpt-4o-mini",                    # Default model
     max_prompt_tokens=128000,               # Input token budget
     token_margin=16384,                     # Reserved for output
-    max_ref_to_process=800,                 # Max num of refs that can be processed 
+    max_ref_to_process=800,                 # Max num of refs that can be processed
     chunk_size=80                           # Refs per LLM call
-)
+) as scorer:
+    result = scorer.process_sheet_by_content(...)
 ```
 
 The constants DEFAULT_MAX_OUTPUT_TOKENS, DEFAULT_MAX_INPUT_OUTPUT_TOKENS are model specific 
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index 18e2bbd..b52b5da 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -103,12 +103,25 @@ def __init__(
         self.model = model
         self.chunk_size = chunk_size
         self.max_ref_to_process = max_ref_to_process
+        self._http_client_json = httpx.Client()
+        self._http_client_text = httpx.Client()
         self.llm = self._create_json_llm(api_key, model)
         self.summarizer = self._create_text_llm(api_key, model)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False
+
+    def close(self):
+        """Close HTTP clients to release resources."""
+        self._http_client_json.close()
+        self._http_client_text.close()
+
     def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for JSON responses."""
-        http_client = httpx.Client()
         return ChatOpenAI(
             model=model,
             temperature=0,
@@ -117,18 +130,17 @@ def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
             presence_penalty=0,
             seed=42,
             api_key=api_key,
-            http_client=http_client,
+            http_client=self._http_client_json,
         )
 
     def _create_text_llm(self, api_key: str, model: str) -> ChatOpenAI:
         """Create LLM client for text responses."""
-        http_client = httpx.Client()
         return ChatOpenAI(
             model=model,
             temperature=0,
             model_kwargs={"response_format": {"type": "text"}},
             api_key=api_key,
-            http_client=http_client,
+            http_client=self._http_client_text,
         )
 
     def _invoke_llm_with_function(self, prompt: str,
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
index 62431f3..2fbb481 100644
--- a/app/sheet_scoring/sheet_scoring.py
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -7,9 +7,8 @@
 
 
 def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
-    scorer = SheetScorer(
-        api_key=os.getenv("OPENAI_API_KEY"))
-    return scorer.process_sheet_by_content(sheet_id=inp.sheet_id,
-                                           title=inp.title,
-                                           sources=inp.sources,
-                                           expanded_refs=inp.expanded_refs)
\ No newline at end of file
+    with SheetScorer(api_key=os.getenv("OPENAI_API_KEY")) as scorer:
+        return scorer.process_sheet_by_content(sheet_id=inp.sheet_id,
+                                               title=inp.title,
+                                               sources=inp.sources,
+                                               expanded_refs=inp.expanded_refs)
\ No newline at end of file

From 74e1f6fe83b8dab822400ef8dbc93b89b99f430c Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:24:39 +0200
Subject: [PATCH 31/36] chore: space

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/openai_sheets_scorer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index b52b5da..0341281 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -430,7 +430,7 @@ def _sheet_to_text(
         logger.info("Sending to LLM sheet without quotations text")
         return no_quotes_content
 
-    def _get_title_info(self,sheet_title: str) -> Dict[str, Any]:
+    def _get_title_info(self, sheet_title: str) -> Dict[str, Any]:
         """Obtain title-interest score ONLY (used when no content)."""
         prompt = self._create_title_only_prompt_function(sheet_title)
         try:

From 4c146af5804061fdffdb13e64c64e7527aae99ec Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:26:16 +0200
Subject: [PATCH 32/36] chore: remove double comment

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/openai_sheets_scorer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index 0341281..2fc8932 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -672,7 +672,7 @@ def get_gpt_scores(
         final_chunk_scores = final_data.get(self.REF_SCORES_FIELD, {})
         ref_scores.update(final_chunk_scores)
 
-        # # Normalize to percentages
+        # Normalize to percentages
         score_percentages = self._normalize_scores_to_percentages(
             score_levels=ref_scores,
             sheet_tokens=self._count_tokens(content)

From c02503aa099c3b8e258ffd489a5ea1a75f5d8ef2 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:27:17 +0200
Subject: [PATCH 33/36] chore: spaces

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/openai_sheets_scorer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index 2fc8932..e8d2393 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -780,12 +780,12 @@ def process_sheet_by_content(self,
         content = self._sheet_to_text(
             no_quotes_content=no_quotes_content,
             full_content=full_content,
-            max_tokens=self.max_prompt_tokens-self.token_margin,
+            max_tokens=self.max_prompt_tokens - self.token_margin,
             add_full_commentary=add_full_commentary)
         # Process with GPT
         gpt_analysis = self.get_gpt_scores(content, expanded_refs, title)
         if not gpt_analysis:
-            request_status_message=f"Failed to get GPT scores for sheet {sheet_id}"
+            request_status_message = f"Failed to get GPT scores for sheet {sheet_id}"
             logger.error(request_status_message)
             return self.create_failure_output(sheet_id=sheet_id,
                                               request_status_message=request_status_message)

From ec614290c3981bdb58c0fabdcdfcfde67372ed44 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Thu, 8 Jan 2026 10:47:35 +0200
Subject: [PATCH 34/36] temprarily raise error to kill tasks

---
 app/sheet_scoring/openai_sheets_scorer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index e8d2393..6deb2ec 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -98,6 +98,7 @@ def __init__(
             max_ref_to_process: int = DEFAULT_MAX_REFS_TO_PROCESS,
             chunk_size: int = DEFAULT_CHUNK_SIZE,
     ):
+        raise 
         self.max_prompt_tokens = max_prompt_tokens
         self.token_margin = token_margin
         self.model = model

From 0c3bbe95cff9ff7e2ffe43ab4951b890c4927d99 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <yishai@sefaria.org>
Date: Thu, 8 Jan 2026 11:15:41 +0200
Subject: [PATCH 35/36] revert

---
 app/sheet_scoring/openai_sheets_scorer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index 6deb2ec..e8d2393 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -98,7 +98,6 @@ def __init__(
             max_ref_to_process: int = DEFAULT_MAX_REFS_TO_PROCESS,
             chunk_size: int = DEFAULT_CHUNK_SIZE,
     ):
-        raise 
         self.max_prompt_tokens = max_prompt_tokens
         self.token_margin = token_margin
         self.model = model

From 4b6c693b7afd436367a242b83a068b5f8a5a3d76 Mon Sep 17 00:00:00 2001
From: YishaiGlasner <60393023+YishaiGlasner@users.noreply.github.com>
Date: Thu, 8 Jan 2026 11:20:16 +0200
Subject: [PATCH 36/36] fix: remove mistaken line from prompt

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 app/sheet_scoring/openai_sheets_scorer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
index e8d2393..740146e 100644
--- a/app/sheet_scoring/openai_sheets_scorer.py
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -306,7 +306,6 @@ def _create_title_only_prompt_function(self, sheet_title: str) -> str:
             {sheet_title}
         
             TASK: Return JSON with keys `title_interest_level` (0-4) and `title_interest_reason` ( < 20 words). 
-            Choose a higher score when the title:
         
             Title interest level (int 0–4):
               0: Not interesting / off‑topic for users