diff --git a/app/celery_setup/app.py b/app/celery_setup/app.py
index 0b50834..d123eeb 100644
--- a/app/celery_setup/app.py
+++ b/app/celery_setup/app.py
@@ -3,4 +3,4 @@
 
 app = Celery('llm')
 app.conf.update(**generate_config_from_env())
-app.autodiscover_tasks(packages=['topic_prompt'])
+app.autodiscover_tasks(packages=['topic_prompt', 'sheet_scoring'])
diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
new file mode 100644
index 0000000..f0aaabe
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/__init__.py
@@ -0,0 +1,3 @@
+from sefaria_llm_interface.sheet_scoring.sheet_scoring_input import *
+from sefaria_llm_interface.sheet_scoring.sheet_scoring_output import *
+
diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
new file mode 100644
index 0000000..e6fcb9d
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_input.py
@@ -0,0 +1,12 @@
+from dataclasses import dataclass
+from typing import List, Dict, Union
+
+
+@dataclass
+class SheetScoringInput:
+    # str version of id 
+    sheet_id: str
+    title: str
+    sources: List[Dict[str, Union[str, Dict[str, str]]]]
+    expanded_refs: List[str]
+
diff --git a/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
new file mode 100644
index 0000000..22f5ba6
--- /dev/null
+++ b/app/llm_interface/sefaria_llm_interface/sheet_scoring/sheet_scoring_output.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+from typing import Dict
+from datetime import datetime
+
+
+@dataclass
+class SheetScoringOutput:
+    sheet_id: str
+    processed_datetime: str
+    language: str
+    title_interest_level: int
+    title_interest_reason: str
+    creativity_score: float
+    ref_levels: Dict[str, int]
+    ref_scores: Dict[str, float]
+    request_status: int
+    request_status_message: str
+
+    def __post_init__(self):
+        if isinstance(self.processed_datetime, datetime):
+            self.processed_datetime = self.processed_datetime.isoformat()
\ No newline at end of file
diff --git a/app/requirements.txt b/app/requirements.txt
index 9d11f97..83e0398 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,8 +1,11 @@
-langchain[llms]~=0.2.1
+langchain==0.2.1
+langchain-core==0.2.2
+langchain-openai==0.1.8
 langsmith~=0.1.0
 anthropic~=0.26.1
 stanza~=1.5.0
 openai~=1.30.0
+httpx~=0.27.0
 typer~=0.4.1
 pydantic~=2.7.1
 loguru~=0.7.2
@@ -10,9 +13,9 @@ tqdm~=4.66.1
 celery[redis]~=5.2.7
 diff-match-patch
 dnspython~=2.5.0
-tiktoken~=0.4.0
+tiktoken
 readability_lxml
 tenacity==8.3.0
 requests
 numpy
-git+https://github.com/Sefaria/LLM@v1.0.3#egg=sefaria_llm_interface&subdirectory=app/llm_interface
+git+https://github.com/Sefaria/LLM@v1.3.6#egg=sefaria_llm_interface&subdirectory=app/llm_interface
diff --git a/app/sheet_scoring/README.md b/app/sheet_scoring/README.md
new file mode 100644
index 0000000..1c789fe
--- /dev/null
+++ b/app/sheet_scoring/README.md
@@ -0,0 +1,231 @@
+# SheetScorer - Jewish Study Sheet Analysis Tool
+
+**SheetScorer** is a Python tool that uses **LLMs** to automatically analyze 
+and score Jewish study sheets for reference relevance and title interest. 
+It processes sheets, evaluates how well each cited reference
+is discussed, and assigns engagement scores to sheet titles.
+
+## Scores Extracted
+
+- **Reference Discussion Scoring**: Analyzes how thoroughly each reference is discussed (**0-4 scale**)
+- **Title Interest Scoring**: Evaluates how engaging sheet titles are to potential readers (**0-4 scale**)  
+- **Creativity Assessment**: Computes creativity scores based on percentage of **user-generated content**. 
+- **Title Interest Reason**: Explanation of title scoring. 
+- **Language**: Language of the sheet [all the languages are supported not only he and en]. 
+
+## Quick Start
+
+```python
+from sheet_scoring.sheet_scoring import score_one_sheet
+from sefaria_llm_interface.sheet_scoring import SheetScoringInput
+
+input_data = SheetScoringInput(
+    sheet_id="123",
+    title="Understanding Genesis Creation",
+    expanded_refs=["Genesis 1:1", "Genesis 1:2"],
+    sources=[
+        {"outsideText": "This commentary explores..."},
+        {"ref": "Genesis 1:1", "text": {"en": "In the beginning..."}, "comment": "Analysis here..."}
+    ]
+)
+
+result = score_one_sheet(input_data)
+print(f"Title score: {result.title_interest_level}")
+print(f"Ref scores: {result.ref_scores}")
+print(result)
+```
+
+## Scoring System
+
+### Architecture
+
+#### sheet_scoring (package)
+- sheet_scoring.py - Main API with score_one_sheet() function 
+- tasks.py - Celery task wrapper for async processing 
+- text_utils.py - Content parsing and token counting utilities 
+- openai_sheets_scorer.py - Core LLM scoring engine
+- README.md
+
+### Reference Discussion Levels
+
+The tool evaluates how well each reference is discussed using a **0-4 scale**:
+
+| Level | Description |
+|-------|-------------|
+| **0 - Not Discussed** | Reference is **quoted only**, no discussion or commentary |
+| **1 - Minimal** | Mentioned only through **neighboring verses**, minimal engagement |
+| **2 - Moderate** | Some discussion present with **basic commentary** |
+| **3 - Significant** | **Substantial discussion** with detailed commentary |
+| **4 - Central** | Reference is a **central focus** of the entire sheet |
+
+### Title Interest Levels
+
+Sheet titles are scored for **user engagement** on a **0-4 scale**:
+
+| Level | Description |
+|-------|-------------|
+| **0 - Not Interesting** | **Off-topic** or unengaging for target users |
+| **1 - Slight Relevance** | **Low appeal**, users unlikely to engage |
+| **2 - Somewhat Interesting** | Users might **skim**, moderate appeal |
+| **3 - Interesting** | Users **likely to open** and read |
+| **4 - Very Compelling** | **Must-read content**, high engagement expected |
+
+### Creativity Score
+
+user_tokens / total_tokens - Higher = more original content vs canonical quotes.
+
+### Language
+ISO-639-1 language code of the sheet, and in case the sheet has no user generated content, the language code of the title.
+
+## Data Structures
+#### Input (SheetScoringInput)
+
+```python
+{
+    "sheet_id": "123",
+    "title": "Sheet title",
+    "expanded_refs": ["Genesis 1:1", "Exodus 2:3"],
+    "sources": [
+        {"outsideText": "User commentary"},
+        {"outsideBiText": {"en": "English", "he": "Hebrew"}},
+        {"ref": "Genesis 1:1", "text": {"en": "Quote"}, "comment": "Analysis"}
+    ]
+}
+```
+#### Output (SheetScoringOutput)
+```python
+{
+    "sheet_id": "123",
+    "ref_levels": {"Genesis 1:1": 3, "Exodus 2:3": 2},      # Raw 0-4 scores
+    "ref_scores": {"Genesis 1:1": 60.0, "Exodus 2:3": 40.0}, # Normalized %
+    "title_interest_level": 3,
+    "title_interest_reason": "Compelling theological question",
+    "language": "en",
+    "creativity_score": 0.75,
+    "processed_datetime": "2025-01-31T10:30:00Z",
+    "request_status": 1,  # 1=success, 0=failure
+    "request_status_message": ""
+}
+```
+
+## Configuration Options
+
+### Initialization Parameters
+
+```python
+with SheetScorer(
+    api_key=os.getenv("OPENAI_API_KEY"),
+    model="gpt-4o-mini",                    # Default model
+    max_prompt_tokens=128000,               # Input token budget
+    token_margin=16384,                     # Reserved for output
+    max_ref_to_process=800,                 # Max num of refs that can be processed
+    chunk_size=80                           # Refs per LLM call
+) as scorer:
+    result = scorer.process_sheet_by_content(...)
+```
+
+The constants DEFAULT_MAX_OUTPUT_TOKENS, DEFAULT_MAX_INPUT_OUTPUT_TOKENS are model specific 
+and can be found on the internet.
+
+## Content Processing Strategy
+
+The tool uses an **adjustable approach** for canonical quotations:
+
+1. **Always includes** all user commentary and **original content**
+2. **Conditionally includes** canonical quotes only if the **entire bundle** fits within token limits
+and **add_full_commentary is set to True** 
+3. **Truncates intelligently** using **LLM summarization** when content exceeds limits 
+
+    1. ***LLM Summarization***: Uses secondary LLM to compress content while preserving key information 
+    2. ***Reference Preservation***: Maintains all biblical reference tags during compression 
+    3. ***Character Fallback***: Falls back to character-based truncation if summarization fails
+## Grading Strategy 
+Processed content is sent to LLM, together with references for grading: 
+
+### Resilient Grading List Processing
+
+- **Chunking**: Large reference lists are processed in **chunks** to stay within model limits
+- **Overlap Handling**: Smart overlap between chunks prevents **reference boundary issues**
+
+### Resilient Reference Grading
+
+- **Primary attempt**: Process **all references together**
+- **Fallback**: Split reference list in **half** and process **recursively**
+- **Final fallback**: Assign **default score of 0** to problematic references
+
+
+### Resilient score extraction 
+
+Uses **OpenAI's function calling** feature with **strict schemas**:
+
+#### Middle Chunk Scoring Schema 
+```python
+{
+    "name": "score_references",
+    "parameters": {
+        "ref_levels": {
+            "Genesis 1:1": {"type": "integer", "minimum": 0, "maximum": 4},
+            # ... for each reference
+        }
+    }
+}
+```
+
+#### Title Scoring Schema
+```python
+{
+    "name": "score_title", 
+    "parameters": {
+        "language": {"type": "string"},
+        "title_interest_level": {"type": "integer", "minimum": 0, "maximum": 4},
+        "title_interest_reason": {"type": "string", "maxLength": 100}
+    }
+}
+```
+
+
+## Database Integration
+
+Designed for **MongoDB integration** with expected document structure:
+
+```python
+{
+    "id": "unique id",
+    "title": "Sheet Title",
+    "expandedRefs": ["Genesis 1:1", "Exodus 2:3"],
+    # Additional sheet content fields...
+}
+```
+
+## Output Fields
+
+| Field                        | Description                                    |
+|------------------------------|------------------------------------------------|
+| **`ref_levels`**             | Raw **0-4 scores** for each reference          |
+| **`ref_scores`**             | **Normalized percentage scores** (sum to 100%) |
+| **`title_interest_level`**   | Title **engagement score** (0-4)               |
+| **`title_interest_reason`**  | **Brief explanation** of title score           |
+| **`language`**               | **Detected language code**                     |
+| **`creativity_score`**       | **Percentage** of user-generated content       |
+| **`processed_datetime`**     | **Processing timestamp**                       |
+| **`request_status`**         | **Whether scoring succeded/failed**            |
+| **`request_status_message`** | **The reason why scoring failed**              |
+
+
+
+
+## Logging
+
+**Comprehensive logging** for monitoring and debugging:
+
+- **Info**: Processing decisions and **content statistics**
+- **Warning**: **Score validation** and fallback usage  
+- **Error**: **LLM failures** and processing errors
+
+Configure logging level as needed:
+```python
+import logging
+logging.getLogger('sheet_scorer').setLevel(logging.INFO)
+```
+
+
diff --git a/app/sheet_scoring/__init__.py b/app/sheet_scoring/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/sheet_scoring/openai_sheets_scorer.py b/app/sheet_scoring/openai_sheets_scorer.py
new file mode 100644
index 0000000..740146e
--- /dev/null
+++ b/app/sheet_scoring/openai_sheets_scorer.py
@@ -0,0 +1,803 @@
+import json
+import logging
+from datetime import datetime
+from enum import IntEnum
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
+import textwrap
+import tiktoken
+import httpx
+from langchain.schema import HumanMessage
+from langchain_openai import ChatOpenAI
+from sheet_scoring.text_utils import sheet_to_text_views
+from sefaria_llm_interface.sheet_scoring import SheetScoringOutput
+# Configure logging
+logger = logging.getLogger(__name__)
+
+
+class IncompleteScoreError(Exception):
+    """Raised when LLM JSON is valid but doesn’t cover every reference."""
+    pass
+
+
+class RequestStatusOptions(IntEnum):
+    """Enumeration for tracking the status of LLM processing requests."""
+    SUCCESS = 1
+    FAILURE = 0
+
+
+class ScoreLevel(IntEnum):
+    """Reference discussion and title interest levels."""
+    NOT_DISCUSSED = 0
+    MINIMAL = 1
+    MODERATE = 2
+    SIGNIFICANT = 3
+    CENTRAL = 4
+
+
+class LanguageCode:
+    """Supported language codes."""
+    ENGLISH = 'en'
+    HEBREW = 'he'
+    DEFAULT = ENGLISH
+
+
+class SheetScorer:
+    """
+    Scores Jewish study sheets for reference relevance and title interest using LLMs,
+    computes creativity score based on percentage of user generated content.
+
+    This class processes sheets from MongoDB, analyzes their content using OpenAI's GPT models,
+    and assigns scores for how well each reference is discussed and how interesting
+    the sheet title is to users.
+    """
+
+    # Configuration constants -
+    # DEFAULT_MAX_INPUT_OUTPUT_TOKENS: total
+    # tokens (prompt+response) we’ll send in one API call. Lowering this
+    # shrinks your available context; raising it risks exceeding the model’s
+    # limit.
+    # DEFAULT_MAX_OUTPUT_TOKENS: cap on how many tokens the model
+    # may generate. If you set this too low, responses may be cut off; too
+    # high wastes quota.
+    # DEFAULT_CHUNK_SIZE: how many references to score
+    # in each batch. Larger chunks use more context (better global view) but
+    # may exceed token budgets.
+    # MAX_CHUNK_OVERLAP: how many refs to repeat
+    # between chunks. More overlap reduces missing-edge-case errors at the
+    # cost of redundant API calls.
+    # DEFAULT_MAX_REFS_TO_PROCESS: total refs
+    # before falling back to equal-distribution scoring. Hitting this limit
+    # skips heavy LLM work to avoid runaway costs. -
+    # DEFAULT_TOKEN_CHAR_RATIO: fallback characters‐per‐token estimate when
+    # encoding fails. Tweak if you find your actual token counts diverge
+    # significantly from this estimate.
+    DEFAULT_MAX_OUTPUT_TOKENS = 16384
+    DEFAULT_CHUNK_SIZE = 80
+    DEFAULT_MAX_INPUT_OUTPUT_TOKENS = 128000
+    DEFAULT_MAX_REFS_TO_PROCESS = 800
+    DEFAULT_TOKEN_CHAR_RATIO = 3
+    MAX_CHUNK_OVERLAP = 10
+    # Database field names
+    REF_SCORES_FIELD = "ref_scores"
+    REF_LEVELS_FIELD = "ref_levels"
+    TITLE_INTEREST_FIELD = "title_interest_level"
+    LANGUAGE_FIELD = "language"
+    TITLE_INTEREST_REASON_FIELD = 'title_interest_reason'
+    PROCESSED_DATETIME_FIELD = "processed_datetime"
+    CREATIVITY_SCORE_FIELD = 'creativity_score'
+
+    # Valid score levels
+    VALID_LEVELS: Set[int] = {level.value for level in ScoreLevel}
+
+    def __init__(
+            self,
+            api_key: Optional[str],
+            model: str = "gpt-4o-mini",
+            max_prompt_tokens: int = DEFAULT_MAX_INPUT_OUTPUT_TOKENS,
+            token_margin: int = DEFAULT_MAX_OUTPUT_TOKENS,
+            max_ref_to_process: int = DEFAULT_MAX_REFS_TO_PROCESS,
+            chunk_size: int = DEFAULT_CHUNK_SIZE,
+    ):
+        self.max_prompt_tokens = max_prompt_tokens
+        self.token_margin = token_margin
+        self.model = model
+        self.chunk_size = chunk_size
+        self.max_ref_to_process = max_ref_to_process
+        self._http_client_json = httpx.Client()
+        self._http_client_text = httpx.Client()
+        self.llm = self._create_json_llm(api_key, model)
+        self.summarizer = self._create_text_llm(api_key, model)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False
+
+    def close(self):
+        """Close HTTP clients to release resources."""
+        self._http_client_json.close()
+        self._http_client_text.close()
+
+    def _create_json_llm(self, api_key: str, model: str) -> ChatOpenAI:
+        """Create LLM client for JSON responses."""
+        return ChatOpenAI(
+            model=model,
+            temperature=0,
+            top_p=0,
+            frequency_penalty=0,
+            presence_penalty=0,
+            seed=42,
+            api_key=api_key,
+            http_client=self._http_client_json,
+        )
+
+    def _create_text_llm(self, api_key: str, model: str) -> ChatOpenAI:
+        """Create LLM client for text responses."""
+        return ChatOpenAI(
+            model=model,
+            temperature=0,
+            model_kwargs={"response_format": {"type": "text"}},
+            api_key=api_key,
+            http_client=self._http_client_text,
+        )
+
+    def _invoke_llm_with_function(self, prompt: str,
+                                  function_schema: Dict[str, Any]) -> (
+            Dict)[str, Any]:
+        """Invoke LLM using function calling instead of JSON mode."""
+        response = self.llm.invoke(
+            [HumanMessage(content=prompt)],
+            functions=[function_schema],
+            function_call={"name": function_schema["name"]}
+        )
+
+        function_call = getattr(response, "additional_kwargs", {}).get(
+            "function_call"
+            )
+        if function_call:
+            return json.loads(function_call["arguments"])
+
+        raise ValueError("No function call in response")
+
+    def _get_reference_scoring_function_schema(self, expanded_refs: List[str]) -> \
+            Dict[str, Any]:
+        """Create function schema for reference scoring with exact reference
+        names."""
+        return {
+            "name": "score_references",
+            "description": "Score how well each reference is "
+                           "discussed in the sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.REF_LEVELS_FIELD: {
+                        "type": "object",
+                        "description": "Scores for each reference (0-4 scale)",
+                        "properties": {
+                            ref_name: {
+                                "type": "integer",
+                                "description": f"Discussion level for {ref_name}",
+                                "minimum": 0,
+                                "maximum": 4
+                            }
+                            for ref_name in expanded_refs
+                        },
+                        "required": expanded_refs,
+                        "additionalProperties": False
+                    }
+                },
+                "required": [self.REF_LEVELS_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    def _get_title_scoring_schema(self) -> Dict[str, Any]:
+        """Create function schema for both reference and title scoring."""
+        return {
+            "name": "score_title",
+            "description": "Score title interest for a Jewish study sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.LANGUAGE_FIELD: {
+                        "type": "string",
+                        "description": "ISO-639-1 title language code",
+                    },
+                    self.TITLE_INTEREST_FIELD: {
+                        "type": "integer",
+                        "description": "How interesting the title is to "
+                                       "users (0-4 scale)",
+                        "minimum": 0,
+                        "maximum": 4
+                    },
+                    self.TITLE_INTEREST_REASON_FIELD: {
+                        "type": "string",
+                        "description": "Brief explanation of title interest "
+                                       "score (max 20 words)",
+                        "maxLength": 100
+                    }
+                },
+                "required": [self.LANGUAGE_FIELD, self.TITLE_INTEREST_FIELD,
+                             self.TITLE_INTEREST_REASON_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    def _get_full_scoring_function_schema(self, expanded_refs: List[str]) -> (
+            Dict)[str, Any]:
+        """Create function schema for both reference and title scoring."""
+        return {
+            "name": "score_sheet",
+            "description": "Score references and title interest for a Jewish "
+                           "study sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    self.LANGUAGE_FIELD: {
+                        "type": "string",
+                        "description": "# ISO‑639‑1 code inferred from "
+                                       "*original user‑written* content",
+                    },
+                    self.REF_LEVELS_FIELD: {
+                        "type": "object",
+                        "description": "Scores for each reference (0-4 scale)",
+                        "properties": {
+                            ref_name: {
+                                "type": "integer",
+                                "description": f"Discussion level for {ref_name}",
+                                "minimum": 0,
+                                "maximum": 4
+                            }
+                            for ref_name in expanded_refs
+                        },
+                        "required": expanded_refs,
+                        "additionalProperties": False
+                    },
+                    self.TITLE_INTEREST_FIELD: {
+                        "type": "integer",
+                        "description": "How interesting the title is to "
+                                       "users (0-4 scale)",
+                        "minimum": 0,
+                        "maximum": 4
+                    },
+                    self.TITLE_INTEREST_REASON_FIELD: {
+                        "type": "string",
+                        "description": "Brief explanation of title interest "
+                                       "score (max 20 words)",
+                        "maxLength": 100
+                    }
+                },
+                "required": [self.LANGUAGE_FIELD, self.REF_LEVELS_FIELD,
+                             self.TITLE_INTEREST_FIELD,
+                             self.TITLE_INTEREST_REASON_FIELD],
+                "additionalProperties": False
+            }
+        }
+
+    @staticmethod
+    def chunk_list(lst: List[Any], n: int) -> Iterator[List[Any]]:
+        """Yield successive n‑sized chunks from lst."""
+        for i in range(0, len(lst), n):
+            yield lst[i: i + n]
+
+    def _count_tokens(self, text: str) -> int:
+        """Rough token count; if no encoder, fall back to char heuristic."""
+        try:
+            encoding = tiktoken.encoding_for_model(self.model)
+            return len(encoding.encode(text))
+        except (KeyError, ValueError) as e:
+            logger.warning(
+                f"Could not get encoding for model {self.model}: {e}"
+            )
+            return len(text) // self.DEFAULT_TOKEN_CHAR_RATIO
+
+    def _invoke_llm(self, prompt: str) -> Dict[str, Any]:
+        """Invoke LLM with prompt and parse JSON response."""
+        response = self.llm([HumanMessage(content=prompt)])
+        return json.loads(response.content)
+
+    def _create_title_only_prompt_function(self, sheet_title: str) -> str:
+        return textwrap.dedent(
+            f"""You are scoring THE TITLE of a Jewish study sheet for how interesting it would be to users.
+    
+            SHEET TITLE:
+            {sheet_title}
+        
+            TASK: Return JSON with keys `title_interest_level` (0-4) and `title_interest_reason` ( < 20 words). 
+        
+            Title interest level (int 0–4):
+              0: Not interesting / off‑topic for users
+              1: Slight relevance, low pull
+              2: Somewhat interesting; user might skim
+              3: Interesting; user likely to open
+              4: Very compelling / must‑open
+            """)
+
+    def _create_chunk_prompt_for_function(self, sheet_content: str,
+                                          expanded_refs: List[str]) -> str:
+        """Create prompt for function calling (no JSON format instructions
+        needed)."""
+        refs_md = "\n".join(f"- {r}" for r in expanded_refs)
+        return textwrap.dedent(
+            f"""
+            You are analyzing a Jewish study sheet. Rate how much each listed reference 
+            is discussed or central in the sheet.
+
+            SHEET CONTENT:
+            {sheet_content}
+
+            REFERENCES TO EVALUATE:
+            {refs_md}
+
+            Scoring Scale (0-4):
+              {ScoreLevel.NOT_DISCUSSED}: Quoted only, no discussion
+              {ScoreLevel.MINIMAL}: Mentioned only through neighboring verses
+              {ScoreLevel.MODERATE}: Moderate discussion (some commentary)
+              {ScoreLevel.SIGNIFICANT}: Significant discussion (substantial commentary)
+              {ScoreLevel.CENTRAL}: Central focus of sheet
+
+            Score each reference based on how thoroughly it's discussed in the content."""
+            )
+
+    def _create_final_chunk_prompt_for_function(self, sheet_content: str,
+                                                expanded_refs: List[str],
+                                                sheet_title: str) -> str:
+        """Create prompt for final chunk with title scoring using function
+        calling."""
+        sheet_title_clean = sheet_title.strip() or "(untitled)"
+        refs_md = "\n".join(f"- {r}" for r in expanded_refs)
+
+        return textwrap.dedent(f"""
+            Analyze this Jewish study sheet and provide two types of scores:
+            
+            SHEET TITLE: {sheet_title_clean}
+            
+            SHEET CONTENT:
+            {sheet_content}
+            
+            REFERENCES TO EVALUATE:
+            {refs_md}
+            
+            TASKS:
+            1. Reference Discussion Scoring (0-4):
+              {ScoreLevel.NOT_DISCUSSED}: Quoted only, no discussion
+              {ScoreLevel.MINIMAL}: Mentioned only through neighboring verses
+              {ScoreLevel.MODERATE}: Moderate discussion (some commentary)
+              {ScoreLevel.SIGNIFICANT}: Significant discussion (substantial commentary)
+              {ScoreLevel.CENTRAL}: Central focus of sheet
+            
+            2. Title Interest Scoring (0-4):
+               0: Not interesting/off-topic
+               1: Slight relevance, low appeal
+               2: Somewhat interesting; user might skim
+               3: Interesting; user likely to open
+               4: Very compelling/must-open
+            
+            Infer the language from the original user-written content.
+            """)
+
+    def _validate_score_level(self, score: Any,
+                              field_name: str = "score") -> int:
+        """Validate and normalize score to valid range."""
+        if score not in self.VALID_LEVELS:
+            try:
+                score = int(score)
+            except (ValueError, TypeError):
+                logger.warning(
+                    f"Invalid {field_name}: {score}, defaulting to 0"
+                )
+                return ScoreLevel.NOT_DISCUSSED
+
+            if score not in self.VALID_LEVELS:
+                clamped = max(
+                    ScoreLevel.NOT_DISCUSSED,
+                    min(ScoreLevel.CENTRAL, score)
+                )
+                logger.warning(
+                    f"{field_name} {score} out of range, clamping to {clamped}"
+                )
+                return clamped
+
+        return score
+
+    def _sheet_to_text(
+        self,
+        no_quotes_content: str,
+        full_content: str,
+        max_tokens: int,
+        add_full_commentary: bool
+    ) -> str:
+        """
+        Build a text snapshot of the sheet with an *all‑or‑nothing* rule:
+        • Always include every bit of author commentary.
+        • Append *all* canonical quotations only if the whole bundle still
+          fits into `max_tokens`.
+        """
+        comm_tokens = self._count_tokens(no_quotes_content)
+        # Commentary alone is already bigger than the budget → truncate & quit
+        full_tokens = self._count_tokens(full_content)
+        if add_full_commentary:
+            if full_tokens <= max_tokens:
+                logger.info("Sending to LLM sheet with quotations")
+                return full_content
+
+        if comm_tokens >= max_tokens:
+            logger.info("Truncating user commentaries")
+            return self._truncate_to_token_budget(no_quotes_content, max_tokens)
+        logger.info("Sending to LLM sheet without quotations text")
+        return no_quotes_content
+
+    def _get_title_info(self, sheet_title: str) -> Dict[str, Any]:
+        """Obtain title-interest score ONLY (used when no content)."""
+        prompt = self._create_title_only_prompt_function(sheet_title)
+        try:
+            function_schema = self._get_title_scoring_schema()
+            data = self._invoke_llm_with_function(prompt, function_schema)
+            title_level = self._validate_score_level(
+                data.get(self.TITLE_INTEREST_FIELD),
+                self.TITLE_INTEREST_FIELD
+            )
+
+            return {
+                self.TITLE_INTEREST_FIELD:
+                    title_level,
+                self.TITLE_INTEREST_REASON_FIELD:
+                    data.get(self.TITLE_INTEREST_REASON_FIELD, ""),
+                self.LANGUAGE_FIELD: data.get(
+                    self.LANGUAGE_FIELD, LanguageCode.DEFAULT
+                ),
+            }
+        except Exception as e:
+            logger.error(f"Title-only GPT attempt failed: {e}")
+            return {
+                self.TITLE_INTEREST_FIELD: ScoreLevel.NOT_DISCUSSED,
+                self.TITLE_INTEREST_REASON_FIELD: "LLM error",
+                self.LANGUAGE_FIELD: LanguageCode.DEFAULT
+            }
+
+    def _normalize_scores_to_percentages(
+            self,
+            sheet_tokens: int,
+            score_levels: Dict[str, int],
+            beta: float = 1500  # token mass where no penalty
+    ) -> Dict[str, float]:
+        """Convert reference scores to percentages with size penalty
+        for shorter sheets."""
+
+        total_level = sum(score_levels.values()) or 1
+        size_factor = min(1.0, sheet_tokens / beta)  # clamp to 1
+
+        # small sheets (few tokens) → size_factor < 1 → percentages shrink
+        percentages = {
+            ref: round(level * 100 / total_level * size_factor, 2)
+            for ref, level in score_levels.items()
+        }
+
+        norm = sum(percentages.values()) or 1
+        percentages = {r: round(v * 100 / norm, 2) for r, v in
+                       percentages.items()}
+        return percentages
+
+    def _grade_refs_resilient(
+            self,
+            content: str,
+            refs: List[str],
+            *,
+            with_title: bool = False,
+            sheet_title: str = ""
+    ) -> Tuple[Optional[Dict[str, Any]], Dict[str, int]]:
+        """
+        Fault-tolerant reference scoring using divide-and-conquer strategy.
+        Attempts to score all references at once via LLM. If that fails
+        (due to incomplete responses),
+        recursively splits the reference list in half and scores each
+        subset separately until all references have scores.
+        This prevents total failure when the LLM struggles with large
+        reference lists or encounters transient errors.
+
+        """
+        if not refs:
+            return {}, {}
+
+        try:
+            if with_title:
+                prompt = self._create_final_chunk_prompt_for_function(
+                    content, refs, sheet_title
+                )
+                function_schema = self._get_full_scoring_function_schema(refs)
+            else:
+                prompt = self._create_chunk_prompt_for_function(content, refs)
+                function_schema = self._get_reference_scoring_function_schema(
+                    refs
+                )
+            data, scores = self._get_gpt_ref_scores_function(
+                prompt, function_schema, refs
+                )
+            return data, scores
+        except Exception:
+            pass
+
+        # fallback branch
+        if len(refs) == 1:  # nothing left to split
+            return {}, {refs[0]: ScoreLevel.NOT_DISCUSSED}
+
+        mid = len(refs) // 2
+        ld, ls = self._grade_refs_resilient(
+            content, refs[:mid],
+            with_title=with_title,
+            sheet_title=sheet_title
+        )
+        rd, rs = self._grade_refs_resilient(
+            content, refs[mid:],
+            with_title=with_title,
+            sheet_title=sheet_title
+        )
+        merged_scores = {**ls, **rs}
+        merged_data = ld or rd
+        return merged_data, merged_scores
+
+    def _get_gpt_ref_scores_function(self, prompt: str, function_schema,
+                                     expected_refs: List[str]):
+        """Calls the LLM with structured function schema, validates all
+        returned scores are in valid range (0-4), handles missing references,
+        and ensures exactly the expected references are scored."""
+        try:
+            data = self._invoke_llm_with_function(prompt, function_schema)
+            chunk_scores = data.get(self.REF_LEVELS_FIELD, {})
+            validated_scores = {}
+            for ref, score in chunk_scores.items():
+                validated_scores[ref] = self._validate_score_level(
+                    score, f"ref_score[{ref}]"
+                )
+
+            # Check for missing references and assign default scores (0)
+            missing_refs = set(expected_refs) - set(validated_scores.keys())
+            if missing_refs:
+                logger.warning(
+                    f"GPT didn't return scores for {len(missing_refs)} "
+                )
+                if len(missing_refs) < 5:
+                    logger.warning(f"Defaulting missing scores to zeros")
+                    for ref in missing_refs:
+                        validated_scores[ref] = ScoreLevel.NOT_DISCUSSED
+
+                else:
+                    raise IncompleteScoreError(
+                        f"Missing {len(missing_refs)} references"
+                    )
+
+            # Ensure we only include expected references (in case GPT
+            # returned extras)
+            final_scores = {
+                ref: validated_scores.get(ref, ScoreLevel.NOT_DISCUSSED) for ref
+                in expected_refs}
+
+            data[self.REF_SCORES_FIELD] = final_scores
+            return data, final_scores
+
+        except IncompleteScoreError:
+            raise
+
+        except Exception as e:
+            logger.error(f"Chunk GPT failed: {e}")
+            return None
+
+    def _last_regular_start(self, n: int, chunk: int, overlap: int) -> int:
+        """
+        Return the index where the *final* chunk (with title) should start.
+        If the total length fits into one chunk plus the allowed overlap,
+        analyse everything together (start = 0).
+        """
+        if n <= chunk + overlap:
+            return 0
+        step = chunk - overlap
+        return max(0, n - chunk) if step <= 0 else (n - chunk)
+
+    def _process_reference_chunks(
+            self,
+            content: str,
+            expanded_refs: List[str]
+    ) -> Optional[Dict[str, int]]:
+        """Process reference chunks in batches."""
+        ref_scores: Dict[str, int] = {}
+
+        last_chunk_start = self._last_regular_start(
+            len(expanded_refs), self.chunk_size, self.MAX_CHUNK_OVERLAP
+        )
+
+        for chunk in self.chunk_list(
+                expanded_refs[:last_chunk_start], self.chunk_size
+        ):
+            # prompt = self._create_chunk_prompt(content,chunk)
+            _, chunk_scores = self._grade_refs_resilient(
+                content=content,
+                refs=chunk,
+                with_title=False
+            )
+            if chunk_scores is None:
+                return None
+            ref_scores.update(chunk_scores)
+
+        return ref_scores
+
+    def _process_final_chunk_with_title(
+            self,
+            content: str,
+            expanded_refs: List[str],
+            title: str,
+    ) -> Optional[Dict[str, Any]]:
+        """Process final chunk and get title scores."""
+        start = self._last_regular_start(
+            len(expanded_refs), self.chunk_size, self.MAX_CHUNK_OVERLAP
+        )
+        final_chunk = expanded_refs[start:]
+
+        # prompt = self._create_final_chunk_prompt(content,final_chunk,title)
+        result = self._grade_refs_resilient(
+            content=content,
+            refs=final_chunk,
+            with_title=True,
+            sheet_title=title
+        )
+
+        if result is None:
+            return None
+
+        data, _ = result
+        return data
+
+    def get_gpt_scores(
+            self,
+            content: str,
+            expanded_refs: List[str],
+            title: str,
+    ) -> Optional[Dict[str, Any]]:
+        """Get GPT scores for references and title."""
+        # Process reference chunks
+        ref_scores = self._process_reference_chunks(content, expanded_refs)
+        if ref_scores is None:
+            return None
+
+        # Process final chunk with title
+        final_data = self._process_final_chunk_with_title(
+            content, expanded_refs, title
+        )
+        if final_data is None:
+            return None
+
+        # Combine scores
+        final_chunk_scores = final_data.get(self.REF_SCORES_FIELD, {})
+        ref_scores.update(final_chunk_scores)
+
+        # Normalize to percentages
+        score_percentages = self._normalize_scores_to_percentages(
+            score_levels=ref_scores,
+            sheet_tokens=self._count_tokens(content)
+        )
+
+        # Validate title score
+        title_level = self._validate_score_level(
+            final_data.get(self.TITLE_INTEREST_FIELD),
+            self.TITLE_INTEREST_FIELD
+        )
+
+        return {
+            self.LANGUAGE_FIELD: final_data.get(
+                self.LANGUAGE_FIELD, LanguageCode.DEFAULT
+            ),
+            self.REF_LEVELS_FIELD: ref_scores,
+            self.REF_SCORES_FIELD: score_percentages,
+            self.TITLE_INTEREST_FIELD: title_level,
+            self.TITLE_INTEREST_REASON_FIELD: final_data.get(
+                self.TITLE_INTEREST_REASON_FIELD, ""
+            ),
+        }
+
+    def _truncate_to_token_budget(self, text: str, max_tokens: int) -> str:
+        """Truncate text to fit within token budget using LLM summarization."""
+        if self._count_tokens(text) <= max_tokens:
+            return text
+        try:
+            prompt = f"""
+            Compress the following commentary to ≤ {max_tokens} tokens.
+            Keep every reference tag like "Genesis 1:1" or "Exodus 2:5".
+            Use clear sentences; preserve main ideas.
+
+            {text}
+            """
+            summary = self.summarizer(
+                [HumanMessage(content=prompt)]
+            ).content.strip()
+
+            if self._count_tokens(summary) <= max_tokens:
+                return summary
+            else:
+                # Fallback: character-based truncation
+                return summary[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
+
+        except Exception as e:
+            logger.error(f"Summarization failed: {e}")
+            # Fallback: character-based truncation
+            return text[:max_tokens * self.DEFAULT_TOKEN_CHAR_RATIO]
+
+    def create_failure_output(self, sheet_id: str, request_status_message: str) -> (
+            SheetScoringOutput):
+        """Create a standardized failure output when sheet processing cannot
+        be completed."""
+        return SheetScoringOutput(
+            sheet_id=sheet_id,
+            processed_datetime=str(datetime.utcnow()),
+            language="",
+            title_interest_level=0,
+            title_interest_reason="",
+            creativity_score=0,
+            ref_levels={},
+            ref_scores={},
+            request_status=RequestStatusOptions.FAILURE,
+            request_status_message=request_status_message
+            )
+
+    def process_sheet_by_content(self,
+                                 sheet_id: str,
+                                 expanded_refs: List[str],
+                                 title: str,
+                                 sources: List[Dict[str, Union[str, Dict[str, str]]]],
+                                 add_full_commentary=False) -> SheetScoringOutput:
+        """Score a single sheet based on its content."""
+        if not expanded_refs:
+            request_status_message = f"No expanded refs for sheet {sheet_id}, skipping"
+            logger.info(request_status_message)
+            return self.create_failure_output(sheet_id,
+                                              request_status_message=request_status_message)
+        text_views = sheet_to_text_views(title=title, sources=sources, default_lang=LanguageCode.DEFAULT)
+        no_quotes_content = text_views["no_quotes"]
+        full_content = text_views["with_quotes"]
+        has_original = text_views["has_original"]
+        creativity_score = text_views["creativity_score"]
+
+        # Check for original content and reference limits
+        if (not has_original or
+                len(expanded_refs) > self.max_ref_to_process):
+            logger.info(f"Sheet {sheet_id}: using equal distribution")
+            score_percentages = {ref: 0 for ref in expanded_refs}
+            title_info = self._get_title_info(title)
+
+            return SheetScoringOutput(sheet_id=sheet_id,
+                                      ref_levels=score_percentages,
+                                      ref_scores=score_percentages,
+                                      processed_datetime=str(datetime.utcnow()),
+                                      creativity_score=creativity_score,
+                                      title_interest_level=title_info[self.TITLE_INTEREST_FIELD],
+                                      title_interest_reason=title_info[self.TITLE_INTEREST_REASON_FIELD],
+                                      language=title_info[self.LANGUAGE_FIELD],
+                                      request_status=RequestStatusOptions.SUCCESS,
+                                      request_status_message="The sheet has no user generated content"
+                                      )
+
+        content = self._sheet_to_text(
+            no_quotes_content=no_quotes_content,
+            full_content=full_content,
+            max_tokens=self.max_prompt_tokens - self.token_margin,
+            add_full_commentary=add_full_commentary)
+        # Process with GPT
+        gpt_analysis = self.get_gpt_scores(content, expanded_refs, title)
+        if not gpt_analysis:
+            request_status_message = f"Failed to get GPT scores for sheet {sheet_id}"
+            logger.error(request_status_message)
+            return self.create_failure_output(sheet_id=sheet_id,
+                                              request_status_message=request_status_message)
+
+        return SheetScoringOutput(
+                sheet_id=sheet_id,
+                ref_levels=gpt_analysis[self.REF_LEVELS_FIELD],
+                ref_scores=gpt_analysis[self.REF_SCORES_FIELD],
+                processed_datetime=str(datetime.utcnow()),
+                creativity_score=creativity_score,
+                title_interest_level=gpt_analysis[self.TITLE_INTEREST_FIELD],
+                title_interest_reason=gpt_analysis[self.TITLE_INTEREST_REASON_FIELD],
+                language=gpt_analysis[self.LANGUAGE_FIELD],
+                request_status=RequestStatusOptions.SUCCESS,
+                request_status_message=""
+                )
diff --git a/app/sheet_scoring/sheet_scoring.py b/app/sheet_scoring/sheet_scoring.py
new file mode 100644
index 0000000..2fbb481
--- /dev/null
+++ b/app/sheet_scoring/sheet_scoring.py
@@ -0,0 +1,14 @@
+from sheet_scoring.openai_sheets_scorer import SheetScorer
+import os
+from sefaria_llm_interface.sheet_scoring import (
+    SheetScoringInput,
+    SheetScoringOutput,
+)
+
+
+def score_one_sheet(inp: SheetScoringInput) -> SheetScoringOutput:
+    with SheetScorer(api_key=os.getenv("OPENAI_API_KEY")) as scorer:
+        return scorer.process_sheet_by_content(sheet_id=inp.sheet_id,
+                                               title=inp.title,
+                                               sources=inp.sources,
+                                               expanded_refs=inp.expanded_refs)
\ No newline at end of file
diff --git a/app/sheet_scoring/tasks.py b/app/sheet_scoring/tasks.py
new file mode 100644
index 0000000..eb4aa55
--- /dev/null
+++ b/app/sheet_scoring/tasks.py
@@ -0,0 +1,13 @@
+from celery import shared_task
+from sheet_scoring.sheet_scoring import score_one_sheet
+from sefaria_llm_interface.sheet_scoring import (
+    SheetScoringInput
+)
+from dataclasses import asdict
+
+
+@shared_task(name='llm.score_sheet')
+def score_sheet_task(raw_input: dict) -> dict:
+    inp = SheetScoringInput(**raw_input)
+    out = score_one_sheet(inp)
+    return asdict(out)
\ No newline at end of file
diff --git a/app/sheet_scoring/text_utils.py b/app/sheet_scoring/text_utils.py
new file mode 100644
index 0000000..551e282
--- /dev/null
+++ b/app/sheet_scoring/text_utils.py
@@ -0,0 +1,114 @@
+import re
+from typing import Dict, List, Union, Any
+
+TOKEN_RE = re.compile(r"\b\w+\b", re.UNICODE)
+
+
+def strip_html(raw: str) -> str:
+    """Remove tags & entities, collapse whitespace."""
+    if not raw:
+        return ""
+    return '\n'.join([' '.join(line.split()) for line in raw.split('\n')])
+
+
+def token_count(text: str) -> int:
+    """Approximate word tokens (both English & Hebrew)."""
+    return len(TOKEN_RE.findall(text))
+
+
+def sheet_to_text_views(title: str,
+                        sources: List[Dict[str, Union[str, Dict[str, str]]]],
+                        default_lang: str = "en") -> Dict[str, Any]:
+    """
+    Build three plain‑text snapshots of a Sefaria sheet **and** compute a
+    creativity score.
+
+    Returns
+    -------
+    quotes_only        str   – ref + canonical text blocks
+    no_quotes          str   – title & user commentary, refs only for quotes
+    with_quotes        str   – full sheet (title, commentary, *and* quotes)
+    has_original       bool  – True if any user commentary exists
+    creativity_score   float – user_token_count / total_token_count
+    """
+
+    quotes: List[str] = []
+    no_quotes: List[str] = []
+    with_quotes: List[str] = []
+
+    original_tokens = 0
+    quoted_tokens = 0
+    has_original = False
+
+    if title:
+        tok = token_count(title)
+        original_tokens += tok
+        no_quotes.append(title)
+        with_quotes.append(title)
+
+    for blk in sources:
+        # --- outsideText (single‑lang commentary)
+        if "outsideText" in blk:
+            txt = strip_html(blk["outsideText"]).strip()
+            if txt:
+                has_original = True
+                t = token_count(txt)
+                original_tokens += t
+                no_quotes.append(txt)
+                with_quotes.append(txt)
+
+        if "outsideBiText" in blk:
+            for lang in ("en", "he"):
+                txt = strip_html(blk["outsideBiText"].get(lang, "")).strip()
+                if txt:
+                    has_original = True
+                    original_tokens += token_count(txt)
+                    no_quotes.append(txt)
+                    with_quotes.append(txt)
+
+        if "text" in blk:
+            ref = blk.get("ref", "").strip()
+            canon = strip_html(blk["text"].get(default_lang, "")).strip()
+
+            # show ref label in all views
+            if ref:
+                no_quotes.append(ref)
+                header = f"{ref}:"
+            else:
+                header = ""
+
+            if canon:
+                # quote tokens count toward quoted_tokens
+                qtok = token_count(canon)
+                quoted_tokens += qtok
+
+                # add to quotes‑only and with_quotes
+                if header:
+                    quotes.append(header)
+                    with_quotes.append(header)
+                quotes.append(canon)
+                with_quotes.append(canon)
+
+        if "comment" in blk:
+            txt = strip_html(blk["comment"]).strip()
+            if txt:
+                has_original = True
+                original_tokens += token_count(txt)
+                no_quotes.append(txt)
+                with_quotes.append(txt)
+
+    joiner = "\n\n"
+    quotes_only = joiner.join(quotes)
+    commentary = joiner.join(no_quotes)
+    full_sheet = joiner.join(with_quotes)
+
+    total_tokens = original_tokens + quoted_tokens or 1  # avoid div‑by‑zero
+    creativity = original_tokens / total_tokens
+
+    return {
+        "quotes_only": quotes_only,
+        "no_quotes": commentary,
+        "with_quotes": full_sheet,
+        "has_original": has_original,
+        "creativity_score": creativity
+    }
\ No newline at end of file
diff --git a/app/util/sentencizer.py b/app/util/sentencizer.py
index 363876c..d76da17 100644
--- a/app/util/sentencizer.py
+++ b/app/util/sentencizer.py
@@ -74,7 +74,7 @@ def claude_sentencizer_first_sentence(text):
     from basic_langchain.chat_models import ChatAnthropic
     from basic_langchain.schema import SystemMessage, HumanMessage
     from util.general import get_by_xml_tag
-    system = SystemMessage(content="Given a text discussing Torah topics will little to no punctuation, "
+    system = SystemMessage(content="Given a text discussing Torah topics with little to no punctuation, "
                                    "output the first sentence. Input is in <input> tags. The first sentence "
                                    "should be output verbatim as it appears in <input> wrapped in "
                                    "<first_sentence> tags. Since the input text has no punctuation, use your judgement as to where the first sentence ends. Prefer smaller sentences.")
diff --git a/build/entrypoint.sh b/build/entrypoint.sh
index fbb070e..7c5136d 100644
--- a/build/entrypoint.sh
+++ b/build/entrypoint.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-celery -A celery_setup.app worker -Q ${QUEUE_NAME} -l INFO --concurrency 50
\ No newline at end of file
+celery -A celery_setup.app worker -Q ${QUEUE_NAME} -l INFO --concurrency 4
\ No newline at end of file