From 84eadf5be51a9eb39fd2c0195ce283f4a0f596b2 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 3 Nov 2025 23:48:49 +0000 Subject: [PATCH 01/93] codex generated changes, need to review and refactor --- .../0003_subscription_output_fields.py | 72 +++++++++++++++++++ radis/extractions/models.py | 28 +++++++- 2 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 radis/extractions/migrations/0003_subscription_output_fields.py diff --git a/radis/extractions/migrations/0003_subscription_output_fields.py b/radis/extractions/migrations/0003_subscription_output_fields.py new file mode 100644 index 00000000..7c6acf18 --- /dev/null +++ b/radis/extractions/migrations/0003_subscription_output_fields.py @@ -0,0 +1,72 @@ +# Generated by Django 5.2.7 on 2025-11-03 22:58 + +import django.db.models.deletion +from django.db import migrations, models +from django.db.models import Q + + +class Migration(migrations.Migration): + + dependencies = [ + ("extractions", "0002_procrastinate_on_delete"), + ("subscriptions", "0008_rename_filter_fields_result_subscribeditem_answers_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="outputfield", + name="job", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="output_fields", + to="extractions.extractionjob", + ), + ), + migrations.RemoveConstraint( + model_name="outputfield", + name="unique_output_field_name_per_job", + ), + migrations.RemoveField( + model_name="outputfield", + name="optional", + ), + migrations.AddField( + model_name="outputfield", + name="subscription", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="extraction_fields", + to="subscriptions.subscription", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=Q(job__isnull=False), + fields=("name", "job_id"), + name="unique_output_field_name_per_job", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=Q(subscription__isnull=False), + fields=("name", "subscription_id"), + name="unique_output_field_name_per_subscription", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.CheckConstraint( + check=( + Q(job__isnull=False, subscription__isnull=True) + | Q(job__isnull=True, subscription__isnull=False) + ), + name="output_field_exactly_one_parent", + ), + ), + ] diff --git a/radis/extractions/models.py b/radis/extractions/models.py index dc761b2f..f778d61a 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -4,6 +4,7 @@ from django.conf import settings from django.contrib.auth.models import Group from django.db import models +from django.db.models import Q from django.urls import reverse from procrastinate.contrib.django import app from procrastinate.contrib.django.models import ProcrastinateJob @@ -82,16 +83,37 @@ class OutputField(models.Model): max_length=1, choices=OutputType.choices, default=OutputType.TEXT ) get_output_type_display: Callable[[], str] - optional = models.BooleanField(default=False) - job = models.ForeignKey[ExtractionJob]( - ExtractionJob, on_delete=models.CASCADE, related_name="output_fields" + job = models.ForeignKey[ + ExtractionJob + ](ExtractionJob, null=True, blank=True, on_delete=models.CASCADE, related_name="output_fields") + subscription = models.ForeignKey[ + "subscriptions.Subscription" + ]( + "subscriptions.Subscription", + null=True, + blank=True, + on_delete=models.CASCADE, + related_name="extraction_fields", ) class Meta: constraints = [ models.UniqueConstraint( fields=["name", "job_id"], + condition=Q(job__isnull=False), name="unique_output_field_name_per_job", + ), + models.UniqueConstraint( + fields=["name", "subscription_id"], + condition=Q(subscription__isnull=False), + name="unique_output_field_name_per_subscription", + ), + models.CheckConstraint( + check=( + Q(job__isnull=False, subscription__isnull=True) + | Q(job__isnull=True, subscription__isnull=False) + ), + name="output_field_exactly_one_parent", ) ] From f39195d191ebcf6a2e6690caa4a4654534e35914 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 3 Nov 2025 23:50:06 +0000 Subject: [PATCH 02/93] forgotten files to stage --- radis/settings/base.py | 24 +++-- radis/subscriptions/forms.py | 89 +++++++++++++++++-- .../0009_separate_filter_and_extraction.py | 49 ++++++++++ radis/subscriptions/models.py | 54 +++++++++-- radis/subscriptions/processors.py | 76 +++++++++++----- .../_subscribed_item_preview.html | 28 ++++++ .../subscriptions/subscription_detail.html | 37 +++++++- radis/subscriptions/tests/__init__.py | 0 .../tests/test_processor_utils.py | 83 +++++++++++++++++ radis/subscriptions/utils/processor_utils.py | 85 +++++++++++++++--- radis/subscriptions/views.py | 75 ++++++++++++---- 11 files changed, 527 insertions(+), 73 deletions(-) create mode 100644 radis/subscriptions/migrations/0009_separate_filter_and_extraction.py create mode 100644 radis/subscriptions/tests/__init__.py create mode 100644 radis/subscriptions/tests/test_processor_utils.py diff --git a/radis/settings/base.py b/radis/settings/base.py index dea0bb53..439c84fb 100644 --- a/radis/settings/base.py +++ b/radis/settings/base.py @@ -356,20 +356,34 @@ """ # Subscription -QUESTIONS_SYSTEM_PROMPT = """ +SUBSCRIPTION_FILTER_PROMPT = """ You are an AI medical assistant with extensive knowledge in radiology and general medicine. You have been trained on a wide range of medical literature, including the latest research and guidelines in radiological practices. -Answer the following questions from the given radiology report. The report and questions can -be given in any language. -Base your answers only on the information provided in the report. Don't hallucinate. -Return the answer in JSON format. Answer with 'true' for 'yes' and 'false' for 'no'. +Answer the following filter questions about the radiology report. The questions can be in any +language. Base your answers strictly on the contents of the report. Return the answers in JSON +format using the provided field identifiers. Answer with `true` for "yes" and `false` for "no". Radiology Report: $report Questions: $questions + +""" + +SUBSCRIPTION_EXTRACTION_PROMPT = """ +You are an AI medical assistant with extensive knowledge in radiology and general medicine. +Extract the requested information from the radiology report. Only provide data that is explicitly +mentioned in the report and respect the expected data type. If the report does not contain the +requested information, respond with null. Return the extracted information in JSON format using +the provided field identifiers. + +Radiology Report: +$report + +Fields to extract: +$fields """ # Extraction diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 59e32c29..cb7af395 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -9,7 +9,9 @@ from radis.reports.models import Language, Modality from radis.search.forms import AGE_STEP, MAX_AGE, MIN_AGE -from .models import Question, Subscription +from radis.extractions.models import OutputField + +from .models import FilterQuestion, Subscription from .site import subscription_retrieval_providers @@ -95,7 +97,16 @@ def build_layout(self): "provider", "query", "send_finished_mail", - Formset("formset", legend="Questions", add_form_label="Add Question"), + Formset( + "filter_formset", + legend="Filter Questions", + add_form_label="Add Filter Question", + ), + Formset( + "extraction_formset", + legend="Extraction Fields", + add_form_label="Add Extraction Field", + ), ), Column( "patient_id", @@ -139,15 +150,21 @@ def clean(self) -> dict[str, Any] | None: return super().clean() -class QuestionForm(forms.ModelForm): +class FilterQuestionForm(forms.ModelForm): class Meta: - model = Question - fields = ["question"] + model = FilterQuestion + fields = ["question", "expected_answer"] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields["question"].required = False + self.fields["expected_answer"].required = False + self.fields["expected_answer"].choices = [ + ("", "Select expected answer"), + *FilterQuestion.ExpectedAnswer.choices, + ] + self.fields["expected_answer"].label = "Accept when answer is" self.helper = FormHelper() self.helper.form_tag = False @@ -157,17 +174,73 @@ def __init__(self, *args, **kwargs): Field("id", type="hidden"), Field("DELETE", type="hidden"), "question", + "expected_answer", ), ) + def clean(self) -> dict[str, Any]: + cleaned_data = super().clean() + question = cleaned_data.get("question") + expected_answer = cleaned_data.get("expected_answer") + + if not question and not expected_answer: + return cleaned_data + + if question and not expected_answer: + cleaned_data["expected_answer"] = FilterQuestion.ExpectedAnswer.YES + + if not question: + raise forms.ValidationError("Question text is required when specifying a filter.") + + return cleaned_data + + +class ExtractionFieldForm(forms.ModelForm): + class Meta: + model = OutputField + fields = ["name", "description", "output_type"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.fields["name"].required = True + self.fields["description"].required = False + + self.helper = FormHelper() + self.helper.form_tag = False + self.helper.disable_csrf = True + self.helper.layout = Layout( + Div( + Field("id", type="hidden"), + Field("DELETE", type="hidden"), + Row( + Column("name", css_class="col-6"), + Column("output_type", css_class="col-4"), + ), + "description", + ) + ) + -QuestionFormSet = forms.inlineformset_factory( +FilterQuestionFormSet = forms.inlineformset_factory( Subscription, - Question, - form=QuestionForm, + FilterQuestion, + form=FilterQuestionForm, extra=1, min_num=0, max_num=3, validate_max=True, can_delete=False, ) + +ExtractionFieldFormSet = forms.inlineformset_factory( + Subscription, + OutputField, + form=ExtractionFieldForm, + fk_name="subscription", + extra=1, + min_num=0, + max_num=10, + validate_max=True, + can_delete=False, +) diff --git a/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py b/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py new file mode 100644 index 00000000..7c923225 --- /dev/null +++ b/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py @@ -0,0 +1,49 @@ +# Generated by Django 5.2.7 on 2025-11-03 21:25 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ( + "subscriptions", + "0008_rename_filter_fields_result_subscribeditem_answers_and_more", + ), + ] + + operations = [ + migrations.RenameModel( + old_name="Question", + new_name="FilterQuestion", + ), + migrations.AddField( + model_name="filterquestion", + name="expected_answer", + field=models.CharField( + choices=[("Y", "Yes"), ("N", "No")], + default="Y", + max_length=1, + ), + ), + migrations.AlterField( + model_name="filterquestion", + name="subscription", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="filter_questions", + to="subscriptions.subscription", + ), + ), + migrations.RenameField( + model_name="subscribeditem", + old_name="answers", + new_name="filter_results", + ), + migrations.AddField( + model_name="subscribeditem", + name="extraction_results", + field=models.JSONField(blank=True, null=True), + ), + ] diff --git a/radis/subscriptions/models.py b/radis/subscriptions/models.py index 71a680c8..115583b2 100644 --- a/radis/subscriptions/models.py +++ b/radis/subscriptions/models.py @@ -8,6 +8,7 @@ from procrastinate.contrib.django.models import ProcrastinateJob from radis.core.models import AnalysisJob, AnalysisTask +from radis.extractions.models import OutputField from radis.reports.models import Language, Modality, Report @@ -50,7 +51,8 @@ class Subscription(models.Model): created_at = models.DateTimeField(auto_now_add=True) last_refreshed = models.DateTimeField(auto_now_add=True) - questions: models.QuerySet["Question"] + filter_questions: models.QuerySet["FilterQuestion"] + extraction_fields: models.QuerySet[OutputField] items: models.QuerySet["SubscribedItem"] send_finished_mail = models.BooleanField(default=False) @@ -67,17 +69,30 @@ def __str__(self): return f"Subscription {self.name} [{self.pk}]" -class Question(models.Model): +class FilterQuestion(models.Model): + class ExpectedAnswer(models.TextChoices): + YES = "Y", "Yes" + NO = "N", "No" + subscription = models.ForeignKey[Subscription]( - Subscription, on_delete=models.CASCADE, related_name="questions" + Subscription, on_delete=models.CASCADE, related_name="filter_questions" ) question = models.CharField(max_length=300) + expected_answer = models.CharField( + max_length=1, + choices=ExpectedAnswer.choices, + default=ExpectedAnswer.YES, + ) def __str__(self) -> str: max_length = 30 - if len(self.question) > max_length: - return f'Question "{self.question[:max_length]}..." [{self.pk}]' - return f'Question "{self.question}" [{self.pk}]' + truncated = self.question[:max_length] + suffix = "..." if len(self.question) > max_length else "" + return f'Filter Question "{truncated}{suffix}" [{self.pk}]' + + @property + def expected_answer_bool(self) -> bool: + return self.expected_answer == self.ExpectedAnswer.YES class SubscribedItem(models.Model): @@ -88,12 +103,37 @@ class SubscribedItem(models.Model): "SubscriptionJob", null=True, on_delete=models.SET_NULL, related_name="items" ) report = models.ForeignKey[Report](Report, on_delete=models.CASCADE, related_name="+") - answers = models.JSONField(null=True, blank=True) + filter_results = models.JSONField(null=True, blank=True) + extraction_results = models.JSONField(null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True) def __str__(self): return f"SubscribedItem of {self.subscription} [{self.pk}]" + def iter_filter_results(self) -> list[tuple[FilterQuestion, bool]]: + if not self.filter_results: + return [] + + results: list[tuple[FilterQuestion, bool]] = [] + subscription_questions = {str(q.pk): q for q in self.subscription.filter_questions.all()} + for key, value in self.filter_results.items(): + question = subscription_questions.get(str(key)) + if question is not None: + results.append((question, bool(value))) + return results + + def iter_extraction_results(self) -> list[tuple[OutputField, object]]: + if not self.extraction_results: + return [] + + results: list[tuple[OutputField, object]] = [] + subscription_fields = {str(f.pk): f for f in self.subscription.extraction_fields.all()} + for key, value in self.extraction_results.items(): + field = subscription_fields.get(str(key)) + if field is not None: + results.append((field, value)) + return results + class SubscriptionJob(AnalysisJob): default_priority = settings.SUBSCRIPTION_DEFAULT_PRIORITY diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index 002371a3..0f6b0a9e 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -1,4 +1,5 @@ import logging +from typing import Any from concurrent.futures import Future, ThreadPoolExecutor from string import Template @@ -16,8 +17,10 @@ SubscriptionTask, ) from .utils.processor_utils import ( - generate_questions_for_prompt, - generate_questions_schema, + build_extraction_schema, + build_filter_schema, + generate_extraction_fields_prompt, + generate_filter_questions_prompt, ) logger = logging.getLogger(__name__) @@ -47,25 +50,58 @@ def process_task(self, task: SubscriptionTask) -> None: def process_report(self, report: Report, task: SubscriptionTask) -> None: subscription: Subscription = task.job.subscription - Schema = generate_questions_schema(subscription.questions) - prompt = Template(settings.QUESTION_SYSTEM_PROMPT).substitute( - { - "report": report.body, - "questions": generate_questions_for_prompt(subscription.questions), - } - ) - result = self.client.extract_data(prompt, Schema) + filter_bundle = build_filter_schema(subscription.filter_questions) - is_accepted = all( - [getattr(result, field_name) for field_name in result.__pydantic_fields__] - ) - if is_accepted: - SubscribedItem.objects.create( - subscription=task.job.subscription, - job=task.job, - report=report, - filter_fields_results=result.model_dump(), + filter_results: dict[str, bool] = {} + is_accepted = True + + if filter_bundle.mapping: + filter_prompt = Template(settings.SUBSCRIPTION_FILTER_PROMPT).substitute( + { + "report": report.body, + "questions": generate_filter_questions_prompt(filter_bundle.mapping), + } ) - logger.debug(f"Report {report.pk} was accepted by subscription {subscription.pk}") + filter_response = self.client.extract_data(filter_prompt, filter_bundle.schema) + + for field_name, question in filter_bundle.mapping: + answer = bool(getattr(filter_response, field_name)) + filter_results[str(question.pk)] = answer + if answer != question.expected_answer_bool: + is_accepted = False else: + logger.debug( + "Subscription %s has no filter questions; accepting report %s by default", + subscription.pk, + report.pk, + ) + + if not is_accepted: logger.debug(f"Report {report.pk} was rejected by subscription {subscription.pk}") + return + + extraction_bundle = build_extraction_schema(subscription.extraction_fields) + extraction_results: dict[str, Any] = {} + + if extraction_bundle.mapping: + extraction_prompt = Template(settings.SUBSCRIPTION_EXTRACTION_PROMPT).substitute( + { + "report": report.body, + "fields": generate_extraction_fields_prompt(extraction_bundle.mapping), + } + ) + extraction_response = self.client.extract_data( + extraction_prompt, extraction_bundle.schema + ) + + for field_name, field in extraction_bundle.mapping: + extraction_results[str(field.pk)] = getattr(extraction_response, field_name) + + SubscribedItem.objects.create( + subscription=task.job.subscription, + job=task.job, + report=report, + filter_results=filter_results or None, + extraction_results=extraction_results or None, + ) + logger.debug(f"Report {report.pk} was accepted by subscription {subscription.pk}") diff --git a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html index 432891f5..fbf65f99 100644 --- a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html +++ b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html @@ -4,6 +4,34 @@
{% include "reports/_report_header.html" with report=subscribed_item.report %}
+ {% with filter_results=subscribed_item.iter_filter_results %} + {% if filter_results %} +
+
Filter results
+
    + {% for question, value in filter_results %} +
  • + {{ question.question }} → {{ value|yesno:"Yes,No" }} +
  • + {% endfor %} +
+
+ {% endif %} + {% endwith %} + {% with extraction_results=subscribed_item.iter_extraction_results %} + {% if extraction_results %} +
+
Extraction results
+
    + {% for field, value in extraction_results %} +
  • + {{ field.name }} → {{ value|default_if_none:"—" }} +
  • + {% endfor %} +
+
+ {% endif %} + {% endwith %}
{{ subscribed_item.report.body }}
diff --git a/radis/subscriptions/templates/subscriptions/subscription_detail.html b/radis/subscriptions/templates/subscriptions/subscription_detail.html index 7002a2b1..dc6cdb46 100644 --- a/radis/subscriptions/templates/subscriptions/subscription_detail.html +++ b/radis/subscriptions/templates/subscriptions/subscription_detail.html @@ -118,8 +118,37 @@

Filter Details

{% endif %} -

Questions

-
    - {% for question in subscription.questions.all %}
  • {{ question.question }}
  • {% endfor %} -
+

Filter Questions

+ {% with questions=subscription.filter_questions.all %} + {% if questions %} +
    + {% for question in questions %} +
  • + {{ question.question }} — accepts reports when answer is + {{ question.get_expected_answer_display|lower }} +
  • + {% endfor %} +
+ {% else %} +

No filter questions defined.

+ {% endif %} + {% endwith %} +

Extraction Fields

+ {% with fields=subscription.extraction_fields.all %} + {% if fields %} +
+ {% for field in fields %} +
{{ field.name }}
+
+ {{ field.description|default:"No description provided." }} +
+ Type: {{ field.get_output_type_display }} +
+
+ {% endfor %} +
+ {% else %} +

No extraction fields configured.

+ {% endif %} + {% endwith %} {% endblock content %} diff --git a/radis/subscriptions/tests/__init__.py b/radis/subscriptions/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/radis/subscriptions/tests/test_processor_utils.py b/radis/subscriptions/tests/test_processor_utils.py new file mode 100644 index 00000000..f2be9b21 --- /dev/null +++ b/radis/subscriptions/tests/test_processor_utils.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass + +from radis.extractions.models import OutputType +from radis.subscriptions.utils.processor_utils import ( + build_extraction_schema, + build_filter_schema, + generate_extraction_fields_prompt, + generate_filter_questions_prompt, +) + + +@dataclass +class DummyFilterQuestion: + pk: int + question: str + expected_answer_bool: bool + + +class DummyFilterQuestionSet: + def __init__(self, *questions: DummyFilterQuestion): + self._questions = list(questions) + + def all(self): + return list(self._questions) + + +@dataclass +class DummyExtractionField: + pk: int + name: str + description: str + output_type: str + + def get_output_type_display(self) -> str: + return { + OutputType.TEXT: "Text", + OutputType.NUMERIC: "Numeric", + OutputType.BOOLEAN: "Boolean", + }[self.output_type] + + +class DummyExtractionFieldSet: + def __init__(self, *fields: DummyExtractionField): + self._fields = list(fields) + + def all(self): + return list(self._fields) + + +def test_build_subscription_schema_and_prompts_without_database(): + questions = DummyFilterQuestionSet( + DummyFilterQuestion(pk=1, question="Contains pneumonia?", expected_answer_bool=True) + ) + fields = DummyExtractionFieldSet( + DummyExtractionField( + pk=10, + name="diagnosis", + description="Primary diagnosis mentioned in the report", + output_type=OutputType.TEXT, + ) + ) + + filter_bundle = build_filter_schema(questions) + extraction_bundle = build_extraction_schema(fields) + + assert len(filter_bundle.mapping) == 1 + assert len(extraction_bundle.mapping) == 1 + + filter_field_name, mapped_question = filter_bundle.mapping[0] + assert filter_field_name.startswith("filter_") + assert mapped_question.pk == 1 + + extraction_field_name, mapped_field = extraction_bundle.mapping[0] + assert extraction_field_name.startswith("extraction_") + assert mapped_field.pk == 10 + + filter_prompt = generate_filter_questions_prompt(filter_bundle.mapping) + assert "filter_" in filter_prompt + assert "Contains pneumonia?" in filter_prompt + + extraction_prompt = generate_extraction_fields_prompt(extraction_bundle.mapping) + assert "extraction_" in extraction_prompt + assert "diagnosis" in extraction_prompt diff --git a/radis/subscriptions/utils/processor_utils.py b/radis/subscriptions/utils/processor_utils.py index ab24ba1c..c728e1e8 100644 --- a/radis/subscriptions/utils/processor_utils.py +++ b/radis/subscriptions/utils/processor_utils.py @@ -1,22 +1,87 @@ +from __future__ import annotations + +from dataclasses import dataclass from typing import Any from django.db.models import QuerySet from pydantic import BaseModel, create_model -from ..models import Question +from radis.extractions.models import OutputField, OutputType + +from ..models import FilterQuestion + +type Numeric = float | int + + +@dataclass(slots=True) +class FilterSchemaBundle: + schema: type[BaseModel] + mapping: list[tuple[str, FilterQuestion]] + + +@dataclass(slots=True) +class ExtractionSchemaBundle: + schema: type[BaseModel] + mapping: list[tuple[str, OutputField]] + + +def build_filter_schema(questions: QuerySet[FilterQuestion]) -> FilterSchemaBundle: + field_definitions: dict[str, Any] = {} + mapping: list[tuple[str, FilterQuestion]] = [] + + for index, question in enumerate(questions.all()): + field_name = f"filter_{index}" + field_definitions[field_name] = (bool, ...) + mapping.append((field_name, question)) + + model_name = "SubscriptionFilterResultsModel" + schema = create_model(model_name, **field_definitions) if field_definitions else create_model(model_name) + return FilterSchemaBundle(schema, mapping) -def generate_questions_schema(questions: QuerySet[Question]) -> type[BaseModel]: +def build_extraction_schema(fields: QuerySet[OutputField]) -> ExtractionSchemaBundle: field_definitions: dict[str, Any] = {} - for index, _ in enumerate(questions.all()): - field_definitions[f"question_{index}"] = (bool, ...) + mapping: list[tuple[str, OutputField]] = [] + + for index, field in enumerate(fields.all()): + field_name = f"extraction_{index}" + if field.output_type == OutputType.TEXT: + output_type = str + elif field.output_type == OutputType.NUMERIC: + output_type = Numeric + elif field.output_type == OutputType.BOOLEAN: + output_type = bool + else: + raise ValueError(f"Unknown output type: {field.output_type}") + + field_definitions[field_name] = (output_type, ...) + + mapping.append((field_name, field)) + + model_name = "SubscriptionExtractionResultsModel" + schema = create_model(model_name, **field_definitions) if field_definitions else create_model(model_name) + return ExtractionSchemaBundle(schema, mapping) + + +def generate_filter_questions_prompt(mapping: list[tuple[str, FilterQuestion]]) -> str: + if not mapping: + return "None" - return create_model("QuestionsModel", field_definitions=field_definitions) + lines: list[str] = [] + for field_name, question in mapping: + lines.append(f"{field_name}: {question.question}") + return "\n".join(lines) -def generate_questions_for_prompt(fields: QuerySet[Question]) -> str: - prompt = "" - for index, question in enumerate(fields.all()): - prompt += f"question_{index}: {question.question}\n" +def generate_extraction_fields_prompt(mapping: list[tuple[str, OutputField]]) -> str: + if not mapping: + return "None" - return prompt + lines: list[str] = [] + for field_name, field in mapping: + description = field.description or "No description provided." + lines.append( + f"{field_name}: {field.name} — {description} " + f"[type: {field.get_output_type_display()}]" + ) + return "\n".join(lines) diff --git a/radis/subscriptions/views.py b/radis/subscriptions/views.py index e04fb74f..066ec0e3 100644 --- a/radis/subscriptions/views.py +++ b/radis/subscriptions/views.py @@ -20,8 +20,12 @@ from radis.subscriptions.filters import SubscriptionFilter from radis.subscriptions.tables import SubscriptionTable -from .forms import QuestionForm, QuestionFormSet, SubscriptionForm -from .models import Question, SubscribedItem, Subscription +from .forms import ( + ExtractionFieldFormSet, + FilterQuestionFormSet, + SubscriptionForm, +) +from .models import SubscribedItem, Subscription logger = getLogger(__name__) @@ -48,7 +52,12 @@ class SubscriptionDetailView(LoginRequiredMixin, DetailView): template_name = "subscriptions/subscription_detail.html" def get_queryset(self): - return super().get_queryset().filter(owner=self.request.user).prefetch_related("questions") + return ( + super() + .get_queryset() + .filter(owner=self.request.user) + .prefetch_related("filter_questions", "extraction_fields") + ) class SubscriptionCreateView(LoginRequiredMixin, CreateView): # TODO: Add PermissionRequiredMixin @@ -60,15 +69,18 @@ class SubscriptionCreateView(LoginRequiredMixin, CreateView): # TODO: Add Permi def get_context_data(self, **kwargs: Any) -> dict[str, Any]: ctx = super().get_context_data(**kwargs) if self.request.POST: - ctx["formset"] = QuestionFormSet(self.request.POST) + ctx["filter_formset"] = FilterQuestionFormSet(self.request.POST) + ctx["extraction_formset"] = ExtractionFieldFormSet(self.request.POST) else: - ctx["formset"] = QuestionFormSet() + ctx["filter_formset"] = FilterQuestionFormSet() + ctx["extraction_formset"] = ExtractionFieldFormSet() return ctx def form_valid(self, form) -> HttpResponse: ctx = self.get_context_data() - formset: BaseInlineFormSet[Question, Subscription, QuestionForm] = ctx["formset"] - if formset.is_valid(): + filter_formset: BaseInlineFormSet = ctx["filter_formset"] + extraction_formset: BaseInlineFormSet = ctx["extraction_formset"] + if filter_formset.is_valid() and extraction_formset.is_valid(): user = self.request.user form.instance.owner = user active_group = user.active_group @@ -82,8 +94,11 @@ def form_valid(self, form) -> HttpResponse: return self.form_invalid(form) raise e - formset.instance = self.object - formset.save() + filter_formset.instance = self.object + filter_formset.save() + + extraction_formset.instance = self.object + extraction_formset.save() return HttpResponseRedirect(self.get_success_url()) else: return self.form_invalid(form) @@ -99,21 +114,34 @@ def get_success_url(self): return reverse("subscription_detail", kwargs={"pk": self.object.pk}) def get_queryset(self) -> QuerySet[Subscription]: - return super().get_queryset().filter(owner=self.request.user).prefetch_related("questions") + return ( + super() + .get_queryset() + .filter(owner=self.request.user) + .prefetch_related("filter_questions", "extraction_fields") + ) def get_context_data(self, **kwargs: Any) -> dict[str, Any]: ctx = super().get_context_data(**kwargs) if self.request.POST: - ctx["formset"] = QuestionFormSet(self.request.POST, instance=self.object) + ctx["filter_formset"] = FilterQuestionFormSet( + self.request.POST, instance=self.object + ) + ctx["extraction_formset"] = ExtractionFieldFormSet( + self.request.POST, instance=self.object + ) else: - ctx["formset"] = QuestionFormSet(instance=self.object) - ctx["formset"].extra = 0 # no additional empty form when editing + ctx["filter_formset"] = FilterQuestionFormSet(instance=self.object) + ctx["extraction_formset"] = ExtractionFieldFormSet(instance=self.object) + ctx["filter_formset"].extra = 0 # no additional empty form when editing + ctx["extraction_formset"].extra = 0 return ctx def form_valid(self, form) -> HttpResponse: ctx = self.get_context_data() - formset: BaseInlineFormSet[Question, Subscription, QuestionForm] = ctx["formset"] - if formset.is_valid(): + filter_formset = ctx["filter_formset"] + extraction_formset = ctx["extraction_formset"] + if filter_formset.is_valid() and extraction_formset.is_valid(): try: self.object = form.save() except IntegrityError as e: @@ -122,8 +150,11 @@ def form_valid(self, form) -> HttpResponse: return self.form_invalid(form) raise e - formset.instance = self.object - formset.save() + filter_formset.instance = self.object + filter_formset.save() + + extraction_formset.instance = self.object + extraction_formset.save() return super().form_valid(form) else: @@ -159,8 +190,14 @@ def get_queryset(self) -> QuerySet[Subscription]: def get_related_queryset(self) -> QuerySet[SubscribedItem]: subscription = cast(Subscription, self.get_object()) - return SubscribedItem.objects.filter(subscription_id=subscription.pk).prefetch_related( - "report" + return ( + SubscribedItem.objects.filter(subscription_id=subscription.pk) + .select_related("subscription") + .prefetch_related( + "report", + "subscription__filter_questions", + "subscription__extraction_fields", + ) ) def get_filter_queryset(self) -> QuerySet[SubscribedItem]: From 087e8b1b46eb7108d55b0618e22318d5033536ae Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 6 Nov 2025 10:51:16 +0100 Subject: [PATCH 03/93] Adding downloads view --- .../extractions/extraction_job_detail.html | 17 ++++-- .../extractions/extraction_result_list.html | 17 ++++-- radis/extractions/urls.py | 6 +++ radis/extractions/utils/__init__.py | 5 ++ radis/extractions/utils/csv_export.py | 47 +++++++++++++++++ radis/extractions/views.py | 52 +++++++++++++++++++ 6 files changed, 134 insertions(+), 10 deletions(-) create mode 100644 radis/extractions/utils/csv_export.py diff --git a/radis/extractions/templates/extractions/extraction_job_detail.html b/radis/extractions/templates/extractions/extraction_job_detail.html index 942a2fc8..4d874c80 100644 --- a/radis/extractions/templates/extractions/extraction_job_detail.html +++ b/radis/extractions/templates/extractions/extraction_job_detail.html @@ -121,11 +121,18 @@
Output Fields
{% if not job.is_preparing %} - - {% bootstrap_icon "eye" %} - View Results - + {% crispy filter.form %} diff --git a/radis/extractions/templates/extractions/extraction_result_list.html b/radis/extractions/templates/extractions/extraction_result_list.html index a0abdb4e..6ca9b510 100644 --- a/radis/extractions/templates/extractions/extraction_result_list.html +++ b/radis/extractions/templates/extractions/extraction_result_list.html @@ -7,11 +7,18 @@ {% block heading %} - - {% bootstrap_icon "arrow-return-left" %} - View Job - + {% endblock heading %} diff --git a/radis/extractions/urls.py b/radis/extractions/urls.py index df459e25..d7c44ba2 100644 --- a/radis/extractions/urls.py +++ b/radis/extractions/urls.py @@ -13,6 +13,7 @@ ExtractionJobVerifyView, ExtractionJobWizardView, ExtractionResultListView, + ExtractionResultDownloadView, ExtractionTaskDeleteView, ExtractionTaskDetailView, ExtractionTaskResetView, @@ -99,6 +100,11 @@ ExtractionResultListView.as_view(), name="extraction_result_list", ), + path( + "jobs//results/download/", + ExtractionResultDownloadView.as_view(), + name="extraction_result_download", + ), path( "instances//", ExtractionInstanceDetailView.as_view(), diff --git a/radis/extractions/utils/__init__.py b/radis/extractions/utils/__init__.py index e69de29b..c32e9b87 100644 --- a/radis/extractions/utils/__init__.py +++ b/radis/extractions/utils/__init__.py @@ -0,0 +1,5 @@ +"""Utility helpers for extraction workflows.""" + +from .csv_export import iter_extraction_result_rows + +__all__ = ["iter_extraction_result_rows"] diff --git a/radis/extractions/utils/csv_export.py b/radis/extractions/utils/csv_export.py new file mode 100644 index 00000000..9b28fc20 --- /dev/null +++ b/radis/extractions/utils/csv_export.py @@ -0,0 +1,47 @@ +"""Helpers for exporting extraction results in CSV format.""" + +from __future__ import annotations + +from collections.abc import Iterable, Sequence +from typing import Any + +from django.db.models import QuerySet + +from radis.extractions.models import ExtractionInstance, ExtractionJob + + +def iter_extraction_result_rows(job: ExtractionJob) -> Iterable[Sequence[str]]: + """Yield rows for the extraction results CSV. + + Args: + job: The extraction job whose results should be exported. + + Yields: + Sequences of stringified cell values suitable for csv.writer. + """ + + field_names: list[str] = list( + job.output_fields.order_by("pk").values_list("name", flat=True) + ) + + header = ["instance_id", "report_id", "is_processed"] + header.extend(field_names) + yield header + + instances: QuerySet[ExtractionInstance] = ExtractionInstance.objects.filter( + task__job=job + ).order_by("pk") + + for instance in instances.iterator(): + row: list[str] = [ + str(instance.pk), + str(instance.report_id) if instance.report_id else "", + "yes" if instance.is_processed else "no", + ] + + output: dict[str, Any] = instance.output or {} + for field_name in field_names: + value = output.get(field_name) + row.append("" if value is None else str(value)) + + yield row diff --git a/radis/extractions/views.py b/radis/extractions/views.py index 78f4e3c2..a037931a 100644 --- a/radis/extractions/views.py +++ b/radis/extractions/views.py @@ -1,3 +1,5 @@ +import csv +from collections.abc import Generator from typing import Any, Type, cast from adit_radis_shared.common.mixins import ( @@ -14,8 +16,10 @@ from django.db import transaction from django.db.models import QuerySet from django.forms import BaseInlineFormSet +from django.http import StreamingHttpResponse from django.shortcuts import redirect from django.urls import reverse_lazy +from django.utils.text import slugify from django.views.generic import DetailView from django_tables2 import SingleTableMixin, tables from formtools.wizard.views import SessionWizardView @@ -50,6 +54,7 @@ ExtractionResultsTable, ExtractionTaskTable, ) +from .utils.csv_export import iter_extraction_result_rows EXTRACTIONS_SEARCH_PROVIDER = "extractions_search_provider" @@ -272,3 +277,50 @@ def get_table(self, **kwargs): def get_table_data(self): job = cast(ExtractionJob, self.get_object()) return ExtractionInstance.objects.filter(task__job=job) + + +class _Echo: + """Lightweight write-only buffer for csv.writer.""" + + def write(self, value: str) -> str: + return value + + +class ExtractionResultDownloadView(ExtractionsLockedMixin, LoginRequiredMixin, DetailView): + """Stream extraction results as a CSV download.""" + + model = ExtractionJob + request: AuthenticatedHttpRequest + + def get_queryset(self) -> QuerySet[ExtractionJob]: + """Return the accessible extraction jobs for the current user.""" + assert self.model + model = cast(Type[ExtractionJob], self.model) + if self.request.user.is_staff: + return model.objects.all() + return model.objects.filter(owner=self.request.user) + + def get(self, request: AuthenticatedHttpRequest, *args, **kwargs) -> StreamingHttpResponse: + """Stream the CSV file response.""" + job = cast(ExtractionJob, self.get_object()) + filename = self._build_filename(job) + + response = StreamingHttpResponse( + self._stream_rows(job), + content_type="text/csv", + ) + response["Content-Disposition"] = f'attachment; filename="{filename}"' + return response + + def _stream_rows(self, job: ExtractionJob) -> Generator[str, None, None]: + """Yield serialized CSV rows for the response.""" + pseudo_buffer = _Echo() + writer = csv.writer(pseudo_buffer) + yield "\ufeff" + for row in iter_extraction_result_rows(job): + yield writer.writerow(row) + + def _build_filename(self, job: ExtractionJob) -> str: + """Generate a descriptive CSV filename for the extraction job.""" + slug = slugify(job.title) or "results" + return f"extraction_job_{job.pk}_{slug}.csv" From a4a4cb2b208bacbad0dcb0707b0865a13ca4f147 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 10:58:25 +0100 Subject: [PATCH 04/93] Dont download report text --- radis/extractions/utils/csv_export.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/radis/extractions/utils/csv_export.py b/radis/extractions/utils/csv_export.py index 9b28fc20..3d83fb43 100644 --- a/radis/extractions/utils/csv_export.py +++ b/radis/extractions/utils/csv_export.py @@ -5,8 +5,6 @@ from collections.abc import Iterable, Sequence from typing import Any -from django.db.models import QuerySet - from radis.extractions.models import ExtractionInstance, ExtractionJob @@ -28,20 +26,22 @@ def iter_extraction_result_rows(job: ExtractionJob) -> Iterable[Sequence[str]]: header.extend(field_names) yield header - instances: QuerySet[ExtractionInstance] = ExtractionInstance.objects.filter( - task__job=job - ).order_by("pk") + instances = ( + ExtractionInstance.objects.filter(task__job=job) + .order_by("pk") + .values_list("pk", "report_id", "is_processed", "output") + ) - for instance in instances.iterator(): + for instance_id, report_id, is_processed, output in instances.iterator(): row: list[str] = [ - str(instance.pk), - str(instance.report_id) if instance.report_id else "", - "yes" if instance.is_processed else "no", + str(instance_id), + str(report_id) if report_id else "", + "yes" if is_processed else "no", ] - output: dict[str, Any] = instance.output or {} + output_dict: dict[str, Any] = output or {} for field_name in field_names: - value = output.get(field_name) + value = output_dict.get(field_name) row.append("" if value is None else str(value)) yield row From ce4091c9a863ac9a71fb012ccee985fa11e69cf5 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 11:08:05 +0100 Subject: [PATCH 05/93] adding tests --- radis/extractions/tests/test_views.py | 57 ++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/radis/extractions/tests/test_views.py b/radis/extractions/tests/test_views.py index bb297864..14530854 100644 --- a/radis/extractions/tests/test_views.py +++ b/radis/extractions/tests/test_views.py @@ -1,13 +1,14 @@ import pytest from adit_radis_shared.accounts.factories import GroupFactory, UserFactory from django.contrib.auth.models import Permission -from django.test import Client +from django.test import Client, override_settings from radis.core.models import AnalysisTask from radis.extractions.factories import ( ExtractionInstanceFactory, ExtractionJobFactory, ExtractionTaskFactory, + OutputFieldFactory, ) from radis.extractions.models import ExtractionJob from radis.reports.factories import LanguageFactory, ReportFactory @@ -29,6 +30,10 @@ def create_test_extraction_task(job=None): return ExtractionTaskFactory.create(job=job) +def _hide_toolbar(_request): + return False + + @pytest.mark.django_db def test_extraction_job_list_view(client: Client): user = UserFactory.create(is_active=True) @@ -202,6 +207,56 @@ def test_extraction_result_list_view(client: Client): assert response.status_code == 200 +@override_settings(DEBUG_TOOLBAR_CONFIG={"SHOW_TOOLBAR_CALLBACK": _hide_toolbar}) +@pytest.mark.django_db +def test_extraction_result_download_view(client: Client): + user = UserFactory.create(is_active=True) + job = create_test_extraction_job(owner=user) + + OutputFieldFactory.create(job=job, name="field_one") + OutputFieldFactory.create(job=job, name="field_two") + + task = create_test_extraction_task(job=job) + language = LanguageFactory.create(code="en") + report = ReportFactory.create(language=language) + instance = ExtractionInstanceFactory.create( + task=task, + report=report, + is_processed=True, + output={"field_one": "value", "field_two": 42}, + ) + + client.force_login(user) + response = client.get(f"/extractions/jobs/{job.pk}/results/download/") + assert response.status_code == 200 + assert response["Content-Type"].startswith("text/csv") + assert f"extraction_job_{job.pk}" in response["Content-Disposition"] + + chunks: list[bytes] = [] + for chunk in response.streaming_content: + if isinstance(chunk, bytes): + chunks.append(chunk) + else: + chunks.append(chunk.encode("utf-8")) + csv_bytes = b"".join(chunks) + csv_text = csv_bytes.decode("utf-8-sig") + + lines = [line.strip() for line in csv_text.strip().splitlines()] + assert lines[0] == "instance_id,report_id,is_processed,field_one,field_two" + assert lines[1] == f"{instance.pk},{instance.report_id},yes,value,42" + + +@override_settings(DEBUG_TOOLBAR_CONFIG={"SHOW_TOOLBAR_CALLBACK": _hide_toolbar}) +@pytest.mark.django_db +def test_extraction_result_download_view_unauthorized(client: Client): + owner = UserFactory.create(is_active=True) + other_user = UserFactory.create(is_active=True) + job = create_test_extraction_job(owner=owner) + client.force_login(other_user) + response = client.get(f"/extractions/jobs/{job.pk}/results/download/") + assert response.status_code == 404 + + @pytest.mark.django_db def test_extraction_task_detail_view(client: Client): user = UserFactory.create(is_active=True) From 395cad6540f9bc392f6e92ed3a8a885b97eac7f7 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 14:26:36 +0100 Subject: [PATCH 06/93] linter error --- radis/extractions/urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/radis/extractions/urls.py b/radis/extractions/urls.py index d7c44ba2..71184dc1 100644 --- a/radis/extractions/urls.py +++ b/radis/extractions/urls.py @@ -12,8 +12,8 @@ ExtractionJobRetryView, ExtractionJobVerifyView, ExtractionJobWizardView, - ExtractionResultListView, ExtractionResultDownloadView, + ExtractionResultListView, ExtractionTaskDeleteView, ExtractionTaskDetailView, ExtractionTaskResetView, From 5a85dd5f9eaa79782b945dc95fe42aececf16cbd Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 14:35:10 +0100 Subject: [PATCH 07/93] add test cases where there are no instances or extraction result has no output fields --- radis/extractions/tests/test_views.py | 61 +++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/radis/extractions/tests/test_views.py b/radis/extractions/tests/test_views.py index 14530854..f1e25bc6 100644 --- a/radis/extractions/tests/test_views.py +++ b/radis/extractions/tests/test_views.py @@ -34,6 +34,17 @@ def _hide_toolbar(_request): return False +def _collect_csv(response) -> str: + chunks: list[bytes] = [] + for chunk in response.streaming_content: + if isinstance(chunk, bytes): + chunks.append(chunk) + else: + chunks.append(chunk.encode("utf-8")) + csv_bytes = b"".join(chunks) + return csv_bytes.decode("utf-8-sig") + + @pytest.mark.django_db def test_extraction_job_list_view(client: Client): user = UserFactory.create(is_active=True) @@ -232,15 +243,7 @@ def test_extraction_result_download_view(client: Client): assert response["Content-Type"].startswith("text/csv") assert f"extraction_job_{job.pk}" in response["Content-Disposition"] - chunks: list[bytes] = [] - for chunk in response.streaming_content: - if isinstance(chunk, bytes): - chunks.append(chunk) - else: - chunks.append(chunk.encode("utf-8")) - csv_bytes = b"".join(chunks) - csv_text = csv_bytes.decode("utf-8-sig") - + csv_text = _collect_csv(response) lines = [line.strip() for line in csv_text.strip().splitlines()] assert lines[0] == "instance_id,report_id,is_processed,field_one,field_two" assert lines[1] == f"{instance.pk},{instance.report_id},yes,value,42" @@ -257,6 +260,46 @@ def test_extraction_result_download_view_unauthorized(client: Client): assert response.status_code == 404 +@override_settings(DEBUG_TOOLBAR_CONFIG={"SHOW_TOOLBAR_CALLBACK": _hide_toolbar}) +@pytest.mark.django_db +def test_extraction_result_download_view_no_instances(client: Client): + user = UserFactory.create(is_active=True) + job = create_test_extraction_job(owner=user) + OutputFieldFactory.create(job=job, name="field_one") + + client.force_login(user) + response = client.get(f"/extractions/jobs/{job.pk}/results/download/") + assert response.status_code == 200 + + csv_text = _collect_csv(response) + assert csv_text.strip() == "instance_id,report_id,is_processed,field_one" + + +@override_settings(DEBUG_TOOLBAR_CONFIG={"SHOW_TOOLBAR_CALLBACK": _hide_toolbar}) +@pytest.mark.django_db +def test_extraction_result_download_view_no_output_fields(client: Client): + user = UserFactory.create(is_active=True) + job = create_test_extraction_job(owner=user) + task = create_test_extraction_task(job=job) + language = LanguageFactory.create(code="en") + report = ReportFactory.create(language=language) + instance = ExtractionInstanceFactory.create( + task=task, + report=report, + is_processed=False, + output={}, + ) + + client.force_login(user) + response = client.get(f"/extractions/jobs/{job.pk}/results/download/") + assert response.status_code == 200 + + csv_text = _collect_csv(response) + lines = csv_text.strip().splitlines() + assert lines[0] == "instance_id,report_id,is_processed" + assert lines[1] == f"{instance.pk},{instance.report_id},no" + + @pytest.mark.django_db def test_extraction_task_detail_view(client: Client): user = UserFactory.create(is_active=True) From 96c500a39843c93c2226e21f1ca2f725435b3127 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 15:02:02 +0100 Subject: [PATCH 08/93] standardize boolean values to yes/no --- radis/extractions/tests/test_views.py | 9 +++++---- radis/extractions/utils/csv_export.py | 11 ++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/radis/extractions/tests/test_views.py b/radis/extractions/tests/test_views.py index f1e25bc6..af099215 100644 --- a/radis/extractions/tests/test_views.py +++ b/radis/extractions/tests/test_views.py @@ -226,6 +226,7 @@ def test_extraction_result_download_view(client: Client): OutputFieldFactory.create(job=job, name="field_one") OutputFieldFactory.create(job=job, name="field_two") + OutputFieldFactory.create(job=job, name="field_bool") task = create_test_extraction_task(job=job) language = LanguageFactory.create(code="en") @@ -234,7 +235,7 @@ def test_extraction_result_download_view(client: Client): task=task, report=report, is_processed=True, - output={"field_one": "value", "field_two": 42}, + output={"field_one": "value", "field_two": 42, "field_bool": False}, ) client.force_login(user) @@ -245,8 +246,8 @@ def test_extraction_result_download_view(client: Client): csv_text = _collect_csv(response) lines = [line.strip() for line in csv_text.strip().splitlines()] - assert lines[0] == "instance_id,report_id,is_processed,field_one,field_two" - assert lines[1] == f"{instance.pk},{instance.report_id},yes,value,42" + assert lines[0] == "instance_id,report_id,is_processed,field_one,field_two,field_bool" + assert lines[1] == f"{instance.pk},{instance.report.pk},yes,value,42,no" @override_settings(DEBUG_TOOLBAR_CONFIG={"SHOW_TOOLBAR_CALLBACK": _hide_toolbar}) @@ -297,7 +298,7 @@ def test_extraction_result_download_view_no_output_fields(client: Client): csv_text = _collect_csv(response) lines = csv_text.strip().splitlines() assert lines[0] == "instance_id,report_id,is_processed" - assert lines[1] == f"{instance.pk},{instance.report_id},no" + assert lines[1] == f"{instance.pk},{instance.report.pk},no" @pytest.mark.django_db diff --git a/radis/extractions/utils/csv_export.py b/radis/extractions/utils/csv_export.py index 3d83fb43..6759c050 100644 --- a/radis/extractions/utils/csv_export.py +++ b/radis/extractions/utils/csv_export.py @@ -8,6 +8,15 @@ from radis.extractions.models import ExtractionInstance, ExtractionJob +def _format_cell(value: Any) -> str: + """Format a single output value for CSV export.""" + if value is None: + return "" + if isinstance(value, bool): + return "yes" if value else "no" + return str(value) + + def iter_extraction_result_rows(job: ExtractionJob) -> Iterable[Sequence[str]]: """Yield rows for the extraction results CSV. @@ -42,6 +51,6 @@ def iter_extraction_result_rows(job: ExtractionJob) -> Iterable[Sequence[str]]: output_dict: dict[str, Any] = output or {} for field_name in field_names: value = output_dict.get(field_name) - row.append("" if value is None else str(value)) + row.append(_format_cell(value)) yield row From 03e184157ff3036c0f3696d4ad7d1c77239813f9 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 7 Nov 2025 16:11:58 +0100 Subject: [PATCH 09/93] button colrs --- .../templates/extractions/extraction_job_detail.html | 2 +- .../templates/extractions/extraction_result_list.html | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/radis/extractions/templates/extractions/extraction_job_detail.html b/radis/extractions/templates/extractions/extraction_job_detail.html index 4d874c80..e580c214 100644 --- a/radis/extractions/templates/extractions/extraction_job_detail.html +++ b/radis/extractions/templates/extractions/extraction_job_detail.html @@ -128,7 +128,7 @@
Output Fields
View Results + class="btn btn-sm btn-success"> {% bootstrap_icon "download" %} Download CSV diff --git a/radis/extractions/templates/extractions/extraction_result_list.html b/radis/extractions/templates/extractions/extraction_result_list.html index 6ca9b510..b44fd9c5 100644 --- a/radis/extractions/templates/extractions/extraction_result_list.html +++ b/radis/extractions/templates/extractions/extraction_result_list.html @@ -9,12 +9,12 @@
+ class="btn btn-sm btn-success"> {% bootstrap_icon "download" %} Download CSV + class="btn btn-sm btn-primary"> {% bootstrap_icon "arrow-return-left" %} View Job From 9b2b368f6ea27aa820ee18b5805f9f62c433f568 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 10 Nov 2025 10:10:45 +0000 Subject: [PATCH 10/93] change models --- ...03_alter_extractionjob_options_and_more.py | 63 ++++++++++++++++++- radis/extractions/models.py | 4 +- ...9_alter_subscribeditem_options_and_more.py | 47 +++++++++++++- 3 files changed, 107 insertions(+), 7 deletions(-) diff --git a/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py b/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py index 660c2081..c04ee4f9 100644 --- a/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py +++ b/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py @@ -1,12 +1,14 @@ -# Generated by Django 5.2.7 on 2025-11-04 14:20 +# Generated by Django 5.2.7 on 2025-11-10 09:59 -from django.db import migrations +import django.db.models.deletion +from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ("extractions", "0002_procrastinate_on_delete"), + ("subscriptions", "0009_alter_subscribeditem_options_and_more"), ] operations = [ @@ -14,8 +16,65 @@ class Migration(migrations.Migration): name="extractionjob", options={"ordering": ["-created_at", "title"]}, ), + migrations.RemoveConstraint( + model_name="outputfield", + name="unique_output_field_name_per_job", + ), migrations.RemoveIndex( model_name="extractionjob", name="extractions_owner_i_49b05e_idx", ), + migrations.RemoveField( + model_name="outputfield", + name="optional", + ), + migrations.AddField( + model_name="outputfield", + name="subscription", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="extraction_fields", + to="subscriptions.subscription", + ), + ), + migrations.AlterField( + model_name="outputfield", + name="job", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="output_fields", + to="extractions.extractionjob", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=models.Q(("job__isnull", False)), + fields=("name", "job_id"), + name="unique_output_field_name_per_job", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=models.Q(("subscription__isnull", False)), + fields=("name", "subscription_id"), + name="unique_output_field_name_per_subscription", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.CheckConstraint( + condition=models.Q( + models.Q(("job__isnull", False), ("subscription__isnull", True)), + models.Q(("job__isnull", True), ("subscription__isnull", False)), + _connector="OR", + ), + name="output_field_exactly_one_parent", + ), + ), ] diff --git a/radis/extractions/models.py b/radis/extractions/models.py index b80bb3a5..c93aeca5 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -89,9 +89,7 @@ class OutputField(models.Model): job = models.ForeignKey[ ExtractionJob ](ExtractionJob, null=True, blank=True, on_delete=models.CASCADE, related_name="output_fields") - subscription = models.ForeignKey[ - "subscriptions.Subscription" - ]( + subscription = models.ForeignKey( "subscriptions.Subscription", null=True, blank=True, diff --git a/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py b/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py index de7e0aec..4268507b 100644 --- a/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py +++ b/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py @@ -1,6 +1,7 @@ -# Generated by Django 5.2.7 on 2025-11-04 14:20 +# Generated by Django 5.2.7 on 2025-11-10 09:59 -from django.db import migrations +import django.db.models.deletion +from django.db import migrations, models class Migration(migrations.Migration): @@ -21,4 +22,46 @@ class Migration(migrations.Migration): name="subscription", options={"ordering": ["-created_at"]}, ), + migrations.RenameField( + model_name="subscribeditem", + old_name="answers", + new_name="extraction_results", + ), + migrations.AddField( + model_name="subscribeditem", + name="filter_results", + field=models.JSONField(blank=True, null=True), + ), + migrations.CreateModel( + name="FilterQuestion", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("question", models.CharField(max_length=300)), + ( + "expected_answer", + models.CharField( + choices=[("Y", "Yes"), ("N", "No")], default="Y", max_length=1 + ), + ), + ( + "subscription", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="filter_questions", + to="subscriptions.subscription", + ), + ), + ], + ), + migrations.DeleteModel( + name="Question", + ), ] From 5d0cc4e820f13175343a95675431f149f6571c80 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 10 Nov 2025 10:11:21 +0000 Subject: [PATCH 11/93] remove previous migration files --- .../0003_subscription_output_fields.py | 72 ------------------- .../0009_separate_filter_and_extraction.py | 49 ------------- 2 files changed, 121 deletions(-) delete mode 100644 radis/extractions/migrations/0003_subscription_output_fields.py delete mode 100644 radis/subscriptions/migrations/0009_separate_filter_and_extraction.py diff --git a/radis/extractions/migrations/0003_subscription_output_fields.py b/radis/extractions/migrations/0003_subscription_output_fields.py deleted file mode 100644 index 7c6acf18..00000000 --- a/radis/extractions/migrations/0003_subscription_output_fields.py +++ /dev/null @@ -1,72 +0,0 @@ -# Generated by Django 5.2.7 on 2025-11-03 22:58 - -import django.db.models.deletion -from django.db import migrations, models -from django.db.models import Q - - -class Migration(migrations.Migration): - - dependencies = [ - ("extractions", "0002_procrastinate_on_delete"), - ("subscriptions", "0008_rename_filter_fields_result_subscribeditem_answers_and_more"), - ] - - operations = [ - migrations.AlterField( - model_name="outputfield", - name="job", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="output_fields", - to="extractions.extractionjob", - ), - ), - migrations.RemoveConstraint( - model_name="outputfield", - name="unique_output_field_name_per_job", - ), - migrations.RemoveField( - model_name="outputfield", - name="optional", - ), - migrations.AddField( - model_name="outputfield", - name="subscription", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="extraction_fields", - to="subscriptions.subscription", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.UniqueConstraint( - condition=Q(job__isnull=False), - fields=("name", "job_id"), - name="unique_output_field_name_per_job", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.UniqueConstraint( - condition=Q(subscription__isnull=False), - fields=("name", "subscription_id"), - name="unique_output_field_name_per_subscription", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.CheckConstraint( - check=( - Q(job__isnull=False, subscription__isnull=True) - | Q(job__isnull=True, subscription__isnull=False) - ), - name="output_field_exactly_one_parent", - ), - ), - ] diff --git a/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py b/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py deleted file mode 100644 index 7c923225..00000000 --- a/radis/subscriptions/migrations/0009_separate_filter_and_extraction.py +++ /dev/null @@ -1,49 +0,0 @@ -# Generated by Django 5.2.7 on 2025-11-03 21:25 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ( - "subscriptions", - "0008_rename_filter_fields_result_subscribeditem_answers_and_more", - ), - ] - - operations = [ - migrations.RenameModel( - old_name="Question", - new_name="FilterQuestion", - ), - migrations.AddField( - model_name="filterquestion", - name="expected_answer", - field=models.CharField( - choices=[("Y", "Yes"), ("N", "No")], - default="Y", - max_length=1, - ), - ), - migrations.AlterField( - model_name="filterquestion", - name="subscription", - field=models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="filter_questions", - to="subscriptions.subscription", - ), - ), - migrations.RenameField( - model_name="subscribeditem", - old_name="answers", - new_name="filter_results", - ), - migrations.AddField( - model_name="subscribeditem", - name="extraction_results", - field=models.JSONField(blank=True, null=True), - ), - ] From 78b6c3bb0508479164389297463c4439b4cd4044 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 10 Nov 2025 10:58:41 +0000 Subject: [PATCH 12/93] fix lint errors --- radis/extractions/models.py | 8 +- radis/subscriptions/factories.py | 6 +- radis/subscriptions/forms.py | 21 ++--- radis/subscriptions/processors.py | 2 +- .../_subscribed_item_preview.html | 12 +-- .../subscriptions/subscription_detail.html | 4 +- .../tests/test_processor_utils.py | 83 ------------------- radis/subscriptions/tests/test_views.py | 4 +- radis/subscriptions/utils/processor_utils.py | 15 +++- radis/subscriptions/views.py | 4 +- 10 files changed, 34 insertions(+), 125 deletions(-) delete mode 100644 radis/subscriptions/tests/test_processor_utils.py diff --git a/radis/extractions/models.py b/radis/extractions/models.py index c93aeca5..4b25fc6e 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -86,9 +86,9 @@ class OutputField(models.Model): max_length=1, choices=OutputType.choices, default=OutputType.TEXT ) get_output_type_display: Callable[[], str] - job = models.ForeignKey[ - ExtractionJob - ](ExtractionJob, null=True, blank=True, on_delete=models.CASCADE, related_name="output_fields") + job = models.ForeignKey[ExtractionJob]( + ExtractionJob, null=True, blank=True, on_delete=models.CASCADE, related_name="output_fields" + ) subscription = models.ForeignKey( "subscriptions.Subscription", null=True, @@ -115,7 +115,7 @@ class Meta: | Q(job__isnull=True, subscription__isnull=False) ), name="output_field_exactly_one_parent", - ) + ), ] def __str__(self) -> str: diff --git a/radis/subscriptions/factories.py b/radis/subscriptions/factories.py index da103952..73e61050 100644 --- a/radis/subscriptions/factories.py +++ b/radis/subscriptions/factories.py @@ -4,7 +4,7 @@ from radis.reports.factories import LanguageFactory, ReportFactory -from .models import Question, SubscribedItem, Subscription, SubscriptionJob, SubscriptionTask +from .models import FilterQuestion, SubscribedItem, Subscription, SubscriptionJob, SubscriptionTask class SubscriptionFactory(BaseDjangoModelFactory[Subscription]): @@ -25,9 +25,9 @@ class Meta: send_finished_mail = factory.Faker("boolean") -class QuestionFactory(BaseDjangoModelFactory[Question]): +class FilterQuestionFactory(BaseDjangoModelFactory[FilterQuestion]): class Meta: - model = Question + model = FilterQuestion subscription = factory.SubFactory(SubscriptionFactory) question = factory.Faker("sentence", nb_words=6, variable_nb_words=True) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index cb7af395..26cc531f 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -6,11 +6,10 @@ from radis.core.constants import LANGUAGE_LABELS from radis.core.layouts import Formset, RangeSlider +from radis.extractions.models import OutputField from radis.reports.models import Language, Modality from radis.search.forms import AGE_STEP, MAX_AGE, MIN_AGE -from radis.extractions.models import OutputField - from .models import FilterQuestion, Subscription from .site import subscription_retrieval_providers @@ -160,7 +159,7 @@ def __init__(self, *args, **kwargs): self.fields["question"].required = False self.fields["expected_answer"].required = False - self.fields["expected_answer"].choices = [ + self.fields["expected_answer"].choices = [ # type: ignore[attr-defined] ("", "Select expected answer"), *FilterQuestion.ExpectedAnswer.choices, ] @@ -180,17 +179,15 @@ def __init__(self, *args, **kwargs): def clean(self) -> dict[str, Any]: cleaned_data = super().clean() - question = cleaned_data.get("question") - expected_answer = cleaned_data.get("expected_answer") - - if not question and not expected_answer: - return cleaned_data + assert cleaned_data - if question and not expected_answer: - cleaned_data["expected_answer"] = FilterQuestion.ExpectedAnswer.YES + question = cleaned_data["question"] + expected_answer = cleaned_data["expected_answer"] - if not question: - raise forms.ValidationError("Question text is required when specifying a filter.") + if not question and not expected_answer: + raise forms.ValidationError( + "Question and Expected Answer text is required when specifying a filter." + ) return cleaned_data diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index 0f6b0a9e..2ee361f9 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -1,7 +1,7 @@ import logging -from typing import Any from concurrent.futures import Future, ThreadPoolExecutor from string import Template +from typing import Any from adit_radis_shared.common.types import User from django import db diff --git a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html index fbf65f99..e8317076 100644 --- a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html +++ b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html @@ -9,11 +9,7 @@
Filter results
    - {% for question, value in filter_results %} -
  • - {{ question.question }} → {{ value|yesno:"Yes,No" }} -
  • - {% endfor %} + {% for question, value in filter_results %}
  • {{ question.question }} → {{ value|yesno:"Yes,No" }}
  • {% endfor %}
{% endif %} @@ -23,11 +19,7 @@
Extraction results
    - {% for field, value in extraction_results %} -
  • - {{ field.name }} → {{ value|default_if_none:"—" }} -
  • - {% endfor %} + {% for field, value in extraction_results %}
  • {{ field.name }} → {{ value|default_if_none:"—" }}
  • {% endfor %}
{% endif %} diff --git a/radis/subscriptions/templates/subscriptions/subscription_detail.html b/radis/subscriptions/templates/subscriptions/subscription_detail.html index dc6cdb46..8ba95b04 100644 --- a/radis/subscriptions/templates/subscriptions/subscription_detail.html +++ b/radis/subscriptions/templates/subscriptions/subscription_detail.html @@ -141,9 +141,7 @@

Extraction Fields

{{ field.name }}
{{ field.description|default:"No description provided." }} -
- Type: {{ field.get_output_type_display }} -
+
Type: {{ field.get_output_type_display }}
{% endfor %} diff --git a/radis/subscriptions/tests/test_processor_utils.py b/radis/subscriptions/tests/test_processor_utils.py deleted file mode 100644 index f2be9b21..00000000 --- a/radis/subscriptions/tests/test_processor_utils.py +++ /dev/null @@ -1,83 +0,0 @@ -from dataclasses import dataclass - -from radis.extractions.models import OutputType -from radis.subscriptions.utils.processor_utils import ( - build_extraction_schema, - build_filter_schema, - generate_extraction_fields_prompt, - generate_filter_questions_prompt, -) - - -@dataclass -class DummyFilterQuestion: - pk: int - question: str - expected_answer_bool: bool - - -class DummyFilterQuestionSet: - def __init__(self, *questions: DummyFilterQuestion): - self._questions = list(questions) - - def all(self): - return list(self._questions) - - -@dataclass -class DummyExtractionField: - pk: int - name: str - description: str - output_type: str - - def get_output_type_display(self) -> str: - return { - OutputType.TEXT: "Text", - OutputType.NUMERIC: "Numeric", - OutputType.BOOLEAN: "Boolean", - }[self.output_type] - - -class DummyExtractionFieldSet: - def __init__(self, *fields: DummyExtractionField): - self._fields = list(fields) - - def all(self): - return list(self._fields) - - -def test_build_subscription_schema_and_prompts_without_database(): - questions = DummyFilterQuestionSet( - DummyFilterQuestion(pk=1, question="Contains pneumonia?", expected_answer_bool=True) - ) - fields = DummyExtractionFieldSet( - DummyExtractionField( - pk=10, - name="diagnosis", - description="Primary diagnosis mentioned in the report", - output_type=OutputType.TEXT, - ) - ) - - filter_bundle = build_filter_schema(questions) - extraction_bundle = build_extraction_schema(fields) - - assert len(filter_bundle.mapping) == 1 - assert len(extraction_bundle.mapping) == 1 - - filter_field_name, mapped_question = filter_bundle.mapping[0] - assert filter_field_name.startswith("filter_") - assert mapped_question.pk == 1 - - extraction_field_name, mapped_field = extraction_bundle.mapping[0] - assert extraction_field_name.startswith("extraction_") - assert mapped_field.pk == 10 - - filter_prompt = generate_filter_questions_prompt(filter_bundle.mapping) - assert "filter_" in filter_prompt - assert "Contains pneumonia?" in filter_prompt - - extraction_prompt = generate_extraction_fields_prompt(extraction_bundle.mapping) - assert "extraction_" in extraction_prompt - assert "diagnosis" in extraction_prompt diff --git a/radis/subscriptions/tests/test_views.py b/radis/subscriptions/tests/test_views.py index eff79f4b..837269a6 100644 --- a/radis/subscriptions/tests/test_views.py +++ b/radis/subscriptions/tests/test_views.py @@ -5,7 +5,7 @@ from radis.reports.factories import LanguageFactory from radis.reports.models import Modality from radis.subscriptions.factories import ( - QuestionFactory, + FilterQuestionFactory, SubscriptionFactory, ) from radis.subscriptions.models import Subscription @@ -190,7 +190,7 @@ def test_subscription_update_view_unauthorized(client: Client): def test_subscription_update_view_post_valid(client: Client): user = UserFactory.create(is_active=True) subscription = create_test_subscription(owner=user, name="Original Name") - question = QuestionFactory.create(subscription=subscription) + question = FilterQuestionFactory.create(subscription=subscription) client.force_login(user) diff --git a/radis/subscriptions/utils/processor_utils.py b/radis/subscriptions/utils/processor_utils.py index c728e1e8..299e7eb1 100644 --- a/radis/subscriptions/utils/processor_utils.py +++ b/radis/subscriptions/utils/processor_utils.py @@ -35,7 +35,11 @@ def build_filter_schema(questions: QuerySet[FilterQuestion]) -> FilterSchemaBund mapping.append((field_name, question)) model_name = "SubscriptionFilterResultsModel" - schema = create_model(model_name, **field_definitions) if field_definitions else create_model(model_name) + schema = ( + create_model(model_name, **field_definitions) + if field_definitions + else create_model(model_name) + ) return FilterSchemaBundle(schema, mapping) @@ -59,7 +63,11 @@ def build_extraction_schema(fields: QuerySet[OutputField]) -> ExtractionSchemaBu mapping.append((field_name, field)) model_name = "SubscriptionExtractionResultsModel" - schema = create_model(model_name, **field_definitions) if field_definitions else create_model(model_name) + schema = ( + create_model(model_name, **field_definitions) + if field_definitions + else create_model(model_name) + ) return ExtractionSchemaBundle(schema, mapping) @@ -81,7 +89,6 @@ def generate_extraction_fields_prompt(mapping: list[tuple[str, OutputField]]) -> for field_name, field in mapping: description = field.description or "No description provided." lines.append( - f"{field_name}: {field.name} — {description} " - f"[type: {field.get_output_type_display()}]" + f"{field_name}: {field.name} — {description} [type: {field.get_output_type_display()}]" ) return "\n".join(lines) diff --git a/radis/subscriptions/views.py b/radis/subscriptions/views.py index d54edd12..8d4fefd4 100644 --- a/radis/subscriptions/views.py +++ b/radis/subscriptions/views.py @@ -126,9 +126,7 @@ def get_queryset(self) -> QuerySet[Subscription]: def get_context_data(self, **kwargs: Any) -> dict[str, Any]: ctx = super().get_context_data(**kwargs) if self.request.POST: - ctx["filter_formset"] = FilterQuestionFormSet( - self.request.POST, instance=self.object - ) + ctx["filter_formset"] = FilterQuestionFormSet(self.request.POST, instance=self.object) ctx["extraction_formset"] = ExtractionFieldFormSet( self.request.POST, instance=self.object ) From 204ff4c9eb72fdd015928d55b9772dc95f76cb6b Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 10 Nov 2025 13:53:19 +0000 Subject: [PATCH 13/93] form fixes --- radis/extractions/models.py | 2 +- radis/subscriptions/forms.py | 38 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/radis/extractions/models.py b/radis/extractions/models.py index 4b25fc6e..622e4fe2 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -110,7 +110,7 @@ class Meta: name="unique_output_field_name_per_subscription", ), models.CheckConstraint( - check=( + condition=( Q(job__isnull=False, subscription__isnull=True) | Q(job__isnull=True, subscription__isnull=False) ), diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 26cc531f..961d87a4 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -168,14 +168,11 @@ def __init__(self, *args, **kwargs): self.helper = FormHelper() self.helper.form_tag = False self.helper.disable_csrf = True - self.helper.layout = Layout( - Div( - Field("id", type="hidden"), - Field("DELETE", type="hidden"), - "question", - "expected_answer", - ), - ) + fields = [Field("id", type="hidden"), "question", "expected_answer"] + if "DELETE" in self.fields: + fields.insert(1, Field("DELETE", type="hidden")) + self.helper.layout = Layout(Div(*fields)) + def clean(self) -> dict[str, Any]: cleaned_data = super().clean() @@ -206,17 +203,20 @@ def __init__(self, *args, **kwargs): self.helper = FormHelper() self.helper.form_tag = False self.helper.disable_csrf = True - self.helper.layout = Layout( - Div( - Field("id", type="hidden"), - Field("DELETE", type="hidden"), - Row( - Column("name", css_class="col-6"), - Column("output_type", css_class="col-4"), - ), - "description", - ) - ) + + fields = [ + Field("id", type="hidden"), + Row( + Column("name", css_class="col-6"), + Column("output_type", css_class="col-4"), + ), + "description", + ] + + if "DELETE" in self.fields: + fields.insert(1, Field("DELETE", type="hidden")) + + self.helper.layout = Layout(Div(*fields)) FilterQuestionFormSet = forms.inlineformset_factory( From b5add3971f71c04b6133af960282006ecb044116 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 11 Nov 2025 13:36:47 +0000 Subject: [PATCH 14/93] Adding Selection output type to the existing types for extracting output --- radis/core/static/core/core.js | 82 +++++++++++++++++ radis/core/templates/cotton/formset.html | 12 +-- radis/extractions/forms.py | 92 ++++++++++++++++++- .../0004_outputfield_selection_options.py | 29 ++++++ radis/extractions/models.py | 30 ++++++ .../extractions/_selection_options_field.html | 48 ++++++++++ .../extractions/extraction_job_detail.html | 10 ++ .../extraction_job_wizard_summary.html | 10 ++ 8 files changed, 305 insertions(+), 8 deletions(-) create mode 100644 radis/extractions/migrations/0004_outputfield_selection_options.py create mode 100644 radis/extractions/templates/extractions/_selection_options_field.html diff --git a/radis/core/static/core/core.js b/radis/core/static/core/core.js index cfff69a0..1b6c4d8f 100644 --- a/radis/core/static/core/core.js +++ b/radis/core/static/core/core.js @@ -89,3 +89,85 @@ function FormSet(rootEl) { }, }; } + +/** + * Manages the dynamic selection options input for extraction output fields. + * + * @param {HTMLElement} rootEl + * @returns {Object} + */ +function SelectionOptions(rootEl) { + const hiddenInput = rootEl.querySelector("[data-selection-input]"); + const formContainer = + rootEl.closest(".formset-form") ?? rootEl.closest("form") ?? rootEl; + const outputTypeField = + formContainer.querySelector('select[name$="-output_type"]') ?? + formContainer.querySelector('select[name="output_type"]'); + + return { + options: [], + maxOptions: 7, + supportsSelection: false, + init() { + this.options = this.parseOptions(hiddenInput?.value); + this.updateSupports(); + if (outputTypeField) { + outputTypeField.addEventListener("change", () => { + const wasSelection = this.supportsSelection; + this.updateSupports(); + if (!this.supportsSelection) { + this.options = []; + } else if (!wasSelection && this.options.length === 0) { + this.options = this.parseOptions(hiddenInput?.value); + } + }); + } + }, + parseOptions(value) { + if (!value) { + return []; + } + try { + const parsed = JSON.parse(value); + if (Array.isArray(parsed)) { + return parsed + .map((opt) => (typeof opt === "string" ? opt : "")) + .filter((opt) => opt !== ""); + } + } catch (err) { + console.warn("Invalid selection options payload", err); + } + return []; + }, + updateSupports() { + this.supportsSelection = outputTypeField + ? outputTypeField.value === "S" + : false; + }, + syncOptions() { + if (!hiddenInput) { + return; + } + const sanitized = this.options + .map((opt) => (typeof opt === "string" ? opt.trim() : "")) + .filter((opt) => opt !== ""); + hiddenInput.value = JSON.stringify(sanitized); + }, + addOption() { + if (!this.supportsSelection || this.options.length >= this.maxOptions) { + return; + } + this.options.push(""); + this.$nextTick(() => { + const inputs = rootEl.querySelectorAll("[data-selection-option-input]"); + const lastInput = inputs[inputs.length - 1]; + if (lastInput instanceof HTMLInputElement) { + lastInput.focus(); + } + }); + }, + removeOption(index) { + this.options.splice(index, 1); + }, + }; +} diff --git a/radis/core/templates/cotton/formset.html b/radis/core/templates/cotton/formset.html index 20c6af7b..904473a1 100644 --- a/radis/core/templates/cotton/formset.html +++ b/radis/core/templates/cotton/formset.html @@ -3,12 +3,12 @@
{% if legend %}{{ legend }}{% endif %} {{ formset.management_form }} - -
- {% for form in formset %}{{ form|crispy }}{% endfor %} -
+ +
+ {% for form in formset %}{% crispy form %}{% endfor %} +
{% if add_form_label %}
+ +
+
+ +

+ No selections defined yet. +

+
+

+ Choose the “Selection” output type to define enumerated values. +

+ {% if form.selection_options.errors %} +
+ {% for error in form.selection_options.errors %} + {{ error }} + {% endfor %} +
+ {% endif %} +
diff --git a/radis/extractions/templates/extractions/extraction_job_detail.html b/radis/extractions/templates/extractions/extraction_job_detail.html index 942a2fc8..dde3e67f 100644 --- a/radis/extractions/templates/extractions/extraction_job_detail.html +++ b/radis/extractions/templates/extractions/extraction_job_detail.html @@ -114,6 +114,16 @@
Output Fields
{{ field.output_type|human_readable_output_type }}
+ {% if field.selection_options %} +
Selections
+
+
    + {% for option in field.selection_options %} +
  • {{ option }}
  • + {% endfor %} +
+
+ {% endif %} {% endfor %} diff --git a/radis/extractions/templates/extractions/extraction_job_wizard_summary.html b/radis/extractions/templates/extractions/extraction_job_wizard_summary.html index fe7bc429..84906dbe 100644 --- a/radis/extractions/templates/extractions/extraction_job_wizard_summary.html +++ b/radis/extractions/templates/extractions/extraction_job_wizard_summary.html @@ -91,6 +91,16 @@
{{ field.output_type|human_readable_output_type }}
+ {% if field.selection_options %} +
Selections
+
+
    + {% for option in field.selection_options %} +
  • {{ option }}
  • + {% endfor %} +
+
+ {% endif %} {% endif %} From fdf5cded6cbe41e84a095a90eaef6d9faf408857 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 11 Nov 2025 21:07:49 +0000 Subject: [PATCH 15/93] fixed subscription tests with new models --- radis/subscriptions/forms.py | 7 ++- radis/subscriptions/tests/test_views.py | 62 +++++++++++++++++-------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 961d87a4..384600f6 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -173,7 +173,6 @@ def __init__(self, *args, **kwargs): fields.insert(1, Field("DELETE", type="hidden")) self.helper.layout = Layout(Div(*fields)) - def clean(self) -> dict[str, Any]: cleaned_data = super().clean() assert cleaned_data @@ -181,9 +180,9 @@ def clean(self) -> dict[str, Any]: question = cleaned_data["question"] expected_answer = cleaned_data["expected_answer"] - if not question and not expected_answer: + if bool(question) != bool(expected_answer): raise forms.ValidationError( - "Question and Expected Answer text is required when specifying a filter." + "Both question and expected answer are required when specifying a filter." ) return cleaned_data @@ -203,7 +202,7 @@ def __init__(self, *args, **kwargs): self.helper = FormHelper() self.helper.form_tag = False self.helper.disable_csrf = True - + fields = [ Field("id", type="hidden"), Row( diff --git a/radis/subscriptions/tests/test_views.py b/radis/subscriptions/tests/test_views.py index 837269a6..fb026cbe 100644 --- a/radis/subscriptions/tests/test_views.py +++ b/radis/subscriptions/tests/test_views.py @@ -2,12 +2,10 @@ from adit_radis_shared.accounts.factories import GroupFactory, UserFactory from django.test import Client +from radis.extractions.factories import OutputFieldFactory from radis.reports.factories import LanguageFactory from radis.reports.models import Modality -from radis.subscriptions.factories import ( - FilterQuestionFactory, - SubscriptionFactory, -) +from radis.subscriptions.factories import FilterQuestionFactory, SubscriptionFactory from radis.subscriptions.models import Subscription @@ -100,11 +98,19 @@ def test_subscription_create_view_post_valid(client: Client): "age_till": 60, "patient_id": "12345", "send_finished_mail": True, - "questions-TOTAL_FORMS": "1", - "questions-INITIAL_FORMS": "0", - "questions-MIN_NUM_FORMS": "0", - "questions-MAX_NUM_FORMS": "3", - "questions-0-question": "What is the diagnosis?", + "filter_questions-TOTAL_FORMS": "1", + "filter_questions-INITIAL_FORMS": "0", + "filter_questions-MIN_NUM_FORMS": "0", + "filter_questions-MAX_NUM_FORMS": "3", + "filter_questions-0-question": "Does the report contain pneumothorax?", + "filter_questions-0-expected_answer": "Y", + "extraction_fields-TOTAL_FORMS": "1", + "extraction_fields-INITIAL_FORMS": "0", + "extraction_fields-MIN_NUM_FORMS": "0", + "extraction_fields-MAX_NUM_FORMS": "10", + "extraction_fields-0-name": "Pneumothorax status", + "extraction_fields-0-description": "Extract pneumothorax related findings", + "extraction_fields-0-output_type": "T", } response = client.post("/subscriptions/create/", data) @@ -125,13 +131,21 @@ def test_subscription_create_view_post_duplicate_name(client: Client): client.force_login(user) + language = LanguageFactory.create(code="en") + data = { "name": "Duplicate Name", "provider": "test_provider", - "questions-TOTAL_FORMS": "0", - "questions-INITIAL_FORMS": "0", - "questions-MIN_NUM_FORMS": "0", - "questions-MAX_NUM_FORMS": "3", + "language": language.pk, + "query": "", + "filter_questions-TOTAL_FORMS": "0", + "filter_questions-INITIAL_FORMS": "0", + "filter_questions-MIN_NUM_FORMS": "0", + "filter_questions-MAX_NUM_FORMS": "3", + "extraction_fields-TOTAL_FORMS": "0", + "extraction_fields-INITIAL_FORMS": "0", + "extraction_fields-MIN_NUM_FORMS": "0", + "extraction_fields-MAX_NUM_FORMS": "10", } response = client.post("/subscriptions/create/", data) @@ -191,6 +205,7 @@ def test_subscription_update_view_post_valid(client: Client): user = UserFactory.create(is_active=True) subscription = create_test_subscription(owner=user, name="Original Name") question = FilterQuestionFactory.create(subscription=subscription) + output_field = OutputFieldFactory.create(subscription=subscription, job=None) client.force_login(user) @@ -201,12 +216,21 @@ def test_subscription_update_view_post_valid(client: Client): "study_description": "Updated study", "patient_sex": "F", "send_finished_mail": False, - "questions-TOTAL_FORMS": "1", - "questions-INITIAL_FORMS": "1", - "questions-MIN_NUM_FORMS": "0", - "questions-MAX_NUM_FORMS": "3", - "questions-0-id": question.pk, - "questions-0-question": "Updated question?", + "filter_questions-TOTAL_FORMS": "1", + "filter_questions-INITIAL_FORMS": "1", + "filter_questions-MIN_NUM_FORMS": "0", + "filter_questions-MAX_NUM_FORMS": "3", + "filter_questions-0-id": question.pk, + "filter_questions-0-question": "Updated question?", + "filter_questions-0-expected_answer": "N", + "extraction_fields-TOTAL_FORMS": "1", + "extraction_fields-INITIAL_FORMS": "1", + "extraction_fields-MIN_NUM_FORMS": "0", + "extraction_fields-MAX_NUM_FORMS": "10", + "extraction_fields-0-id": output_field.pk, + "extraction_fields-0-name": "Volume", + "extraction_fields-0-description": "Volume description", + "extraction_fields-0-output_type": "N", } response = client.post(f"/subscriptions/{subscription.pk}/update/", data) From 3b28ce4566310c29fa6dd69d75edab2cc7f004fe Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 14 Nov 2025 14:04:29 +0000 Subject: [PATCH 16/93] Add tests for Selection form added via Alpine --- radis/extractions/tests/test_forms.py | 92 ++++++++++++++++++++++ radis/extractions/utils/processor_utils.py | 21 +++-- 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 radis/extractions/tests/test_forms.py diff --git a/radis/extractions/tests/test_forms.py b/radis/extractions/tests/test_forms.py new file mode 100644 index 00000000..2089bb42 --- /dev/null +++ b/radis/extractions/tests/test_forms.py @@ -0,0 +1,92 @@ +import json + +import pytest +from django.core.exceptions import ValidationError + +from radis.extractions.factories import ExtractionJobFactory +from radis.extractions.forms import OutputFieldForm +from radis.extractions.models import OutputField, OutputType + + +@pytest.mark.django_db +def test_output_field_form_accepts_selection_options(): + job = ExtractionJobFactory.create() + form = OutputFieldForm( + data={ + "name": "tumor_grade", + "description": "Classified tumor grade.", + "output_type": OutputType.SELECTION, + "selection_options": json.dumps(["Grade 1", "Grade 2"]), + }, + instance=OutputField(job=job), + ) + + assert form.is_valid() + instance = form.save(commit=False) + + assert instance.selection_options == ["Grade 1", "Grade 2"] + + +@pytest.mark.django_db +def test_output_field_form_requires_options_for_selection(): + job = ExtractionJobFactory.create() + form = OutputFieldForm( + data={ + "name": "tumor_grade", + "description": "Classified tumor grade.", + "output_type": OutputType.SELECTION, + "selection_options": json.dumps([]), + }, + instance=OutputField(job=job), + ) + + assert not form.is_valid() + assert "selection_options" in form.errors + + +@pytest.mark.django_db +def test_output_field_form_rejects_options_for_non_selection(): + job = ExtractionJobFactory.create() + form = OutputFieldForm( + data={ + "name": "tumor_grade", + "description": "Classified tumor grade.", + "output_type": OutputType.TEXT, + "selection_options": json.dumps(["Grade 1"]), + }, + instance=OutputField(job=job), + ) + + assert not form.is_valid() + assert "selection_options" in form.errors + + +@pytest.mark.django_db +def test_output_field_clean_trims_selection_options(): + job = ExtractionJobFactory.create() + field = OutputField( + job=job, + name="tumor_grade", + description="Classified tumor grade.", + output_type=OutputType.SELECTION, + selection_options=[" Grade 1 ", "Grade 2 "], + ) + + field.full_clean() + + assert field.selection_options == ["Grade 1", "Grade 2"] + + +@pytest.mark.django_db +def test_output_field_clean_rejects_selection_options_for_other_types(): + job = ExtractionJobFactory.create() + field = OutputField( + job=job, + name="tumor_grade", + description="Classified tumor grade.", + output_type=OutputType.TEXT, + selection_options=["Grade 1"], + ) + + with pytest.raises(ValidationError): + field.full_clean() diff --git a/radis/extractions/utils/processor_utils.py b/radis/extractions/utils/processor_utils.py index e7f12bca..8c99cfeb 100644 --- a/radis/extractions/utils/processor_utils.py +++ b/radis/extractions/utils/processor_utils.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Literal from django.db.models import QuerySet from pydantic import BaseModel, create_model @@ -11,12 +11,18 @@ def generate_output_fields_schema(fields: QuerySet[OutputField]) -> type[BaseModel]: field_definitions: dict[str, Any] = {} for field in fields.all(): - if field.output_type == OutputType.TEXT: + field_type = OutputType(field.output_type) + if field_type == OutputType.TEXT: output_type = str - elif field.output_type == OutputType.NUMERIC: + elif field_type == OutputType.NUMERIC: output_type = Numeric - elif field.output_type == OutputType.BOOLEAN: + elif field_type == OutputType.BOOLEAN: output_type = bool + elif field_type == OutputType.SELECTION: + options = tuple(field.selection_options) + if not options: + raise ValueError("Selection output requires at least one option.") + output_type = Literal.__getitem__(options) else: raise ValueError(f"Unknown data type: {field.output_type}") @@ -28,6 +34,11 @@ def generate_output_fields_schema(fields: QuerySet[OutputField]) -> type[BaseMod def generate_output_fields_prompt(fields: QuerySet[OutputField]) -> str: prompt = "" for field in fields.all(): - prompt += f"{field.name}: {field.description}\n" + description = field.description + if OutputType(field.output_type) == OutputType.SELECTION and field.selection_options: + description = ( + f"{description} (Allowed selections: {', '.join(field.selection_options)})" + ) + prompt += f"{field.name}: {description}\n" return prompt From 6292d1daceb98fbe845e9a26e67a4f7833fe30af Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 17 Nov 2025 12:29:21 +0000 Subject: [PATCH 17/93] added unit test for subscriptions that use the LLM to filter/extract --- .../tests/unit/test_processors.py | 43 +++++++++++++++++++ radis/subscriptions/utils/testing_helpers.py | 43 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 radis/subscriptions/tests/unit/test_processors.py create mode 100644 radis/subscriptions/utils/testing_helpers.py diff --git a/radis/subscriptions/tests/unit/test_processors.py b/radis/subscriptions/tests/unit/test_processors.py new file mode 100644 index 00000000..926eccea --- /dev/null +++ b/radis/subscriptions/tests/unit/test_processors.py @@ -0,0 +1,43 @@ +from unittest.mock import MagicMock, patch + +import pytest +from pydantic import BaseModel + +from radis.chats.utils.testing_helpers import create_openai_client_mock +from radis.subscriptions.models import SubscribedItem +from radis.subscriptions.processors import SubscriptionTaskProcessor +from radis.subscriptions.utils.testing_helpers import create_subscription_task + + +class FilterOutput(BaseModel): + filter_0: bool + + +class ExtractionOutput(BaseModel): + extraction_0: str + + +@pytest.mark.django_db(transaction=True) +def test_subscription_task_processor_filters_and_extracts(): + task, filter_question, extraction_field, report = create_subscription_task() + + filter_output = FilterOutput(filter_0=True) + extraction_output = ExtractionOutput(extraction_0="Pneumothorax status confirmed") + + filter_response = MagicMock(choices=[MagicMock(message=MagicMock(parsed=filter_output))]) + extraction_response = MagicMock( + choices=[MagicMock(message=MagicMock(parsed=extraction_output))] + ) + + openai_mock = create_openai_client_mock(extraction_output) + openai_mock.beta.chat.completions.parse = MagicMock( + side_effect=[filter_response, extraction_response] + ) + with patch("openai.OpenAI", return_value=openai_mock): + SubscriptionTaskProcessor(task).start() + + subscribed_item = SubscribedItem.objects.get(subscription=task.job.subscription, report=report) + assert subscribed_item.filter_results == {str(filter_question.pk): True} + assert subscribed_item.extraction_results == { + str(extraction_field.pk): "Pneumothorax status confirmed" + } diff --git a/radis/subscriptions/utils/testing_helpers.py b/radis/subscriptions/utils/testing_helpers.py new file mode 100644 index 00000000..bc1f5a0d --- /dev/null +++ b/radis/subscriptions/utils/testing_helpers.py @@ -0,0 +1,43 @@ +from adit_radis_shared.accounts.factories import GroupFactory, UserFactory +from adit_radis_shared.common.utils.testing_helpers import add_user_to_group + +from radis.extractions.factories import OutputFieldFactory +from radis.extractions.models import OutputType +from radis.reports.factories import LanguageFactory, ReportFactory +from radis.subscriptions.factories import FilterQuestionFactory, SubscriptionFactory +from radis.subscriptions.models import FilterQuestion, SubscriptionJob, SubscriptionTask + + +def create_subscription_task(): + language = LanguageFactory.create(code="en") + + user = UserFactory(is_active=True) + group = GroupFactory() + add_user_to_group(user, group) + user.active_group = group + user.save() + + subscription = SubscriptionFactory.create(owner=user, group=group, language=language) + + filter_question = FilterQuestionFactory.create( + subscription=subscription, expected_answer=FilterQuestion.ExpectedAnswer.YES + ) + extraction_field = OutputFieldFactory.create( + subscription=subscription, + job=None, + output_type=OutputType.TEXT, + ) + + job = SubscriptionJob.objects.create( + subscription=subscription, + owner=user, + owner_id=user.id, + status=SubscriptionJob.Status.PENDING, + ) + task = SubscriptionTask.objects.create(job=job, status=SubscriptionTask.Status.PENDING) + + report = ReportFactory.create(language=language, body="Pneumothorax observed.") + report.groups.add(group) + task.reports.add(report) + + return task, filter_question, extraction_field, report From 197faca4cfe1cd188f1224e45e4b18522d7b6bb8 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 17 Nov 2025 14:31:42 +0000 Subject: [PATCH 18/93] match font size of legend to other elements --- radis/subscriptions/forms.py | 19 ------------------- .../static/subscriptions/subscriptions.css | 6 ++++++ 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 384600f6..e7fd92a1 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -11,7 +11,6 @@ from radis.search.forms import AGE_STEP, MAX_AGE, MIN_AGE from .models import FilterQuestion, Subscription -from .site import subscription_retrieval_providers class SubscriptionForm(forms.ModelForm): @@ -19,7 +18,6 @@ class Meta: model = Subscription fields = [ "name", - "provider", "query", "language", "modalities", @@ -33,21 +31,12 @@ class Meta: labels = {"patient_id": "Patient ID"} help_texts = { "name": "Name of the Subscription", - "provider": "The search provider to use for the database query", "query": "A query to filter reports", } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fields["provider"].widget = forms.Select( - choices=sorted( - [ - (provider.name, provider.name) - for provider in subscription_retrieval_providers.values() - ] - ) - ) self.fields["language"].choices = [ # type: ignore (language.pk, LANGUAGE_LABELS[language.code]) for language in Language.objects.order_by("code") @@ -93,7 +82,6 @@ def build_layout(self): Row( Column( "name", - "provider", "query", "send_finished_mail", Formset( @@ -139,13 +127,6 @@ def clean(self) -> dict[str, Any] | None: if age_from is not None and age_till is not None and age_from >= age_till: raise forms.ValidationError("Age from must be less than age till") - provider = self.cleaned_data["provider"] - query = self.cleaned_data["query"] - if query != "" and not provider: - raise forms.ValidationError( - "Setup of RADIS is incomplete. No retrieval providers are registered." - ) - return super().clean() diff --git a/radis/subscriptions/static/subscriptions/subscriptions.css b/radis/subscriptions/static/subscriptions/subscriptions.css index e69de29b..016f8965 100644 --- a/radis/subscriptions/static/subscriptions/subscriptions.css +++ b/radis/subscriptions/static/subscriptions/subscriptions.css @@ -0,0 +1,6 @@ +fieldset > legend { + font-size: 1rem !important; + font-weight: 500 !important; + line-height: 1.5; + margin-bottom: 0.5rem; +} From 838cf79924541127625a07cf61f068d7575fc95e Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 17 Nov 2025 15:32:23 +0000 Subject: [PATCH 19/93] fix filter question form validation --- radis/subscriptions/forms.py | 24 ++++++++++++----- radis/subscriptions/tests/test_views.py | 36 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index e7fd92a1..d0ef835d 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -141,7 +141,7 @@ def __init__(self, *args, **kwargs): self.fields["question"].required = False self.fields["expected_answer"].required = False self.fields["expected_answer"].choices = [ # type: ignore[attr-defined] - ("", "Select expected answer"), + ("", "Select the expected answer"), *FilterQuestion.ExpectedAnswer.choices, ] self.fields["expected_answer"].label = "Accept when answer is" @@ -154,17 +154,27 @@ def __init__(self, *args, **kwargs): fields.insert(1, Field("DELETE", type="hidden")) self.helper.layout = Layout(Div(*fields)) + def has_changed(self) -> bool: + if not self.is_bound: + return super().has_changed() + + question = (self.data.get(self.add_prefix("question")) or "").strip() + expected_answer = self.data.get(self.add_prefix("expected_answer")) or "" + + if not question and not expected_answer: + return False + + return super().has_changed() + def clean(self) -> dict[str, Any]: cleaned_data = super().clean() assert cleaned_data - question = cleaned_data["question"] - expected_answer = cleaned_data["expected_answer"] + question = cleaned_data.get("question") + expected_answer = cleaned_data.get("expected_answer") - if bool(question) != bool(expected_answer): - raise forms.ValidationError( - "Both question and expected answer are required when specifying a filter." - ) + if bool(question) ^ bool(expected_answer): + raise forms.ValidationError("You must provide both a question and an expected answer.") return cleaned_data diff --git a/radis/subscriptions/tests/test_views.py b/radis/subscriptions/tests/test_views.py index fb026cbe..8a473545 100644 --- a/radis/subscriptions/tests/test_views.py +++ b/radis/subscriptions/tests/test_views.py @@ -119,6 +119,42 @@ def test_subscription_create_view_post_valid(client: Client): assert Subscription.objects.filter(name="Test Subscription").exists() +@pytest.mark.django_db +def test_subscription_create_view_ignores_empty_filter_question(client: Client): + user = UserFactory.create(is_active=True) + group = GroupFactory.create() + user.groups.add(group) + user.active_group = group + user.save() + + language = LanguageFactory.create(code="en") + + client.force_login(user) + + data = { + "name": "Subscription Without Filter", + "provider": "test_provider", + "language": language.pk, + "query": "", + "filter_questions-TOTAL_FORMS": "1", + "filter_questions-INITIAL_FORMS": "0", + "filter_questions-MIN_NUM_FORMS": "0", + "filter_questions-MAX_NUM_FORMS": "3", + "filter_questions-0-question": "", + "filter_questions-0-expected_answer": "", + "extraction_fields-TOTAL_FORMS": "0", + "extraction_fields-INITIAL_FORMS": "0", + "extraction_fields-MIN_NUM_FORMS": "0", + "extraction_fields-MAX_NUM_FORMS": "10", + } + + response = client.post("/subscriptions/create/", data) + assert response.status_code == 302 + + subscription = Subscription.objects.get(name="Subscription Without Filter") + assert subscription.filter_questions.count() == 0 + + @pytest.mark.django_db def test_subscription_create_view_post_duplicate_name(client: Client): user = UserFactory.create(is_active=True) From b602c5fe782d1d3d9e1bb421de7cd11f99cbf055 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 17 Nov 2025 23:42:51 +0000 Subject: [PATCH 20/93] fix dependency cycle --- ...03_alter_extractionjob_options_and_more.py | 63 +------------------ .../0004_remove_outputfield_unique.py | 33 ++++++++++ .../0005_outputfield_subscription_and_more.py | 53 ++++++++++++++++ radis/extractions/models.py | 2 +- ...9_alter_subscribeditem_options_and_more.py | 47 +------------- ..._subscribeditem_filter_results_and_more.py | 56 +++++++++++++++++ 6 files changed, 147 insertions(+), 107 deletions(-) create mode 100644 radis/extractions/migrations/0004_remove_outputfield_unique.py create mode 100644 radis/extractions/migrations/0005_outputfield_subscription_and_more.py create mode 100644 radis/subscriptions/migrations/0010_rename_answers_subscribeditem_filter_results_and_more.py diff --git a/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py b/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py index c04ee4f9..660c2081 100644 --- a/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py +++ b/radis/extractions/migrations/0003_alter_extractionjob_options_and_more.py @@ -1,14 +1,12 @@ -# Generated by Django 5.2.7 on 2025-11-10 09:59 +# Generated by Django 5.2.7 on 2025-11-04 14:20 -import django.db.models.deletion -from django.db import migrations, models +from django.db import migrations class Migration(migrations.Migration): dependencies = [ ("extractions", "0002_procrastinate_on_delete"), - ("subscriptions", "0009_alter_subscribeditem_options_and_more"), ] operations = [ @@ -16,65 +14,8 @@ class Migration(migrations.Migration): name="extractionjob", options={"ordering": ["-created_at", "title"]}, ), - migrations.RemoveConstraint( - model_name="outputfield", - name="unique_output_field_name_per_job", - ), migrations.RemoveIndex( model_name="extractionjob", name="extractions_owner_i_49b05e_idx", ), - migrations.RemoveField( - model_name="outputfield", - name="optional", - ), - migrations.AddField( - model_name="outputfield", - name="subscription", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="extraction_fields", - to="subscriptions.subscription", - ), - ), - migrations.AlterField( - model_name="outputfield", - name="job", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="output_fields", - to="extractions.extractionjob", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.UniqueConstraint( - condition=models.Q(("job__isnull", False)), - fields=("name", "job_id"), - name="unique_output_field_name_per_job", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.UniqueConstraint( - condition=models.Q(("subscription__isnull", False)), - fields=("name", "subscription_id"), - name="unique_output_field_name_per_subscription", - ), - ), - migrations.AddConstraint( - model_name="outputfield", - constraint=models.CheckConstraint( - condition=models.Q( - models.Q(("job__isnull", False), ("subscription__isnull", True)), - models.Q(("job__isnull", True), ("subscription__isnull", False)), - _connector="OR", - ), - name="output_field_exactly_one_parent", - ), - ), ] diff --git a/radis/extractions/migrations/0004_remove_outputfield_unique.py b/radis/extractions/migrations/0004_remove_outputfield_unique.py new file mode 100644 index 00000000..de73e8a5 --- /dev/null +++ b/radis/extractions/migrations/0004_remove_outputfield_unique.py @@ -0,0 +1,33 @@ +# Generated by Django 5.2.8 on 2025-11-17 23:19 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("extractions", "0003_alter_extractionjob_options_and_more"), + ] + + operations = [ + migrations.RemoveConstraint( + model_name="outputfield", + name="unique_output_field_name_per_job", + ), + migrations.RemoveField( + model_name="outputfield", + name="optional", + ), + migrations.AlterField( + model_name="outputfield", + name="job", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="output_fields", + to="extractions.extractionjob", + ), + ), + ] diff --git a/radis/extractions/migrations/0005_outputfield_subscription_and_more.py b/radis/extractions/migrations/0005_outputfield_subscription_and_more.py new file mode 100644 index 00000000..72f0c484 --- /dev/null +++ b/radis/extractions/migrations/0005_outputfield_subscription_and_more.py @@ -0,0 +1,53 @@ +# Generated by Django 5.2.8 on 2025-11-17 23:39 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("extractions", "0004_remove_outputfield_unique"), + ("subscriptions", "0010_rename_answers_subscribeditem_filter_results_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="outputfield", + name="subscription", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="output_fields", + to="subscriptions.subscription", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=models.Q(("job__isnull", False)), + fields=("name", "job_id"), + name="unique_output_field_name_per_job", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.UniqueConstraint( + condition=models.Q(("subscription__isnull", False)), + fields=("name", "subscription_id"), + name="unique_output_field_name_per_subscription", + ), + ), + migrations.AddConstraint( + model_name="outputfield", + constraint=models.CheckConstraint( + condition=models.Q( + models.Q(("job__isnull", False), ("subscription__isnull", True)), + models.Q(("job__isnull", True), ("subscription__isnull", False)), + _connector="OR", + ), + name="output_field_exactly_one_parent", + ), + ), + ] diff --git a/radis/extractions/models.py b/radis/extractions/models.py index 622e4fe2..8e360b67 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -94,7 +94,7 @@ class OutputField(models.Model): null=True, blank=True, on_delete=models.CASCADE, - related_name="extraction_fields", + related_name="output_fields", ) class Meta: diff --git a/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py b/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py index 4268507b..de7e0aec 100644 --- a/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py +++ b/radis/subscriptions/migrations/0009_alter_subscribeditem_options_and_more.py @@ -1,7 +1,6 @@ -# Generated by Django 5.2.7 on 2025-11-10 09:59 +# Generated by Django 5.2.7 on 2025-11-04 14:20 -import django.db.models.deletion -from django.db import migrations, models +from django.db import migrations class Migration(migrations.Migration): @@ -22,46 +21,4 @@ class Migration(migrations.Migration): name="subscription", options={"ordering": ["-created_at"]}, ), - migrations.RenameField( - model_name="subscribeditem", - old_name="answers", - new_name="extraction_results", - ), - migrations.AddField( - model_name="subscribeditem", - name="filter_results", - field=models.JSONField(blank=True, null=True), - ), - migrations.CreateModel( - name="FilterQuestion", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("question", models.CharField(max_length=300)), - ( - "expected_answer", - models.CharField( - choices=[("Y", "Yes"), ("N", "No")], default="Y", max_length=1 - ), - ), - ( - "subscription", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="filter_questions", - to="subscriptions.subscription", - ), - ), - ], - ), - migrations.DeleteModel( - name="Question", - ), ] diff --git a/radis/subscriptions/migrations/0010_rename_answers_subscribeditem_filter_results_and_more.py b/radis/subscriptions/migrations/0010_rename_answers_subscribeditem_filter_results_and_more.py new file mode 100644 index 00000000..23a13958 --- /dev/null +++ b/radis/subscriptions/migrations/0010_rename_answers_subscribeditem_filter_results_and_more.py @@ -0,0 +1,56 @@ +# Generated by Django 5.2.8 on 2025-11-17 23:36 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("subscriptions", "0009_alter_subscribeditem_options_and_more"), + ] + + operations = [ + migrations.RenameField( + model_name="subscribeditem", + old_name="answers", + new_name="filter_results", + ), + migrations.AddField( + model_name="subscribeditem", + name="extraction_results", + field=models.JSONField(blank=True, null=True), + ), + migrations.CreateModel( + name="FilterQuestion", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("question", models.CharField(max_length=300)), + ( + "expected_answer", + models.CharField( + choices=[("Y", "Yes"), ("N", "No")], default="Y", max_length=1 + ), + ), + ( + "subscription", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="filter_questions", + to="subscriptions.subscription", + ), + ), + ], + ), + migrations.DeleteModel( + name="Question", + ), + ] From 7410f9d4df994c8249be8d2ed0a2a4f2cb31188e Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 17 Nov 2025 23:55:54 +0000 Subject: [PATCH 21/93] fix files that use the new renamed output_fields --- radis/subscriptions/models.py | 4 +- radis/subscriptions/processors.py | 6 +-- radis/subscriptions/tests/test_views.py | 46 ++++++++++---------- radis/subscriptions/utils/processor_utils.py | 2 +- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/radis/subscriptions/models.py b/radis/subscriptions/models.py index 46eda99f..f049f065 100644 --- a/radis/subscriptions/models.py +++ b/radis/subscriptions/models.py @@ -52,7 +52,7 @@ class Subscription(models.Model): last_refreshed = models.DateTimeField(auto_now_add=True) filter_questions: models.QuerySet["FilterQuestion"] - extraction_fields: models.QuerySet[OutputField] + output_fields: models.QuerySet[OutputField] items: models.QuerySet["SubscribedItem"] send_finished_mail = models.BooleanField(default=False) @@ -131,7 +131,7 @@ def iter_extraction_results(self) -> list[tuple[OutputField, object]]: return [] results: list[tuple[OutputField, object]] = [] - subscription_fields = {str(f.pk): f for f in self.subscription.extraction_fields.all()} + subscription_fields = {str(f.pk): f for f in self.subscription.output_fields.all()} for key, value in self.extraction_results.items(): field = subscription_fields.get(str(key)) if field is not None: diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index 2ee361f9..a94f3555 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -19,8 +19,8 @@ from .utils.processor_utils import ( build_extraction_schema, build_filter_schema, - generate_extraction_fields_prompt, generate_filter_questions_prompt, + generate_output_fields_prompt, ) logger = logging.getLogger(__name__) @@ -80,14 +80,14 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: logger.debug(f"Report {report.pk} was rejected by subscription {subscription.pk}") return - extraction_bundle = build_extraction_schema(subscription.extraction_fields) + extraction_bundle = build_extraction_schema(subscription.output_fields) extraction_results: dict[str, Any] = {} if extraction_bundle.mapping: extraction_prompt = Template(settings.SUBSCRIPTION_EXTRACTION_PROMPT).substitute( { "report": report.body, - "fields": generate_extraction_fields_prompt(extraction_bundle.mapping), + "fields": generate_output_fields_prompt(extraction_bundle.mapping), } ) extraction_response = self.client.extract_data( diff --git a/radis/subscriptions/tests/test_views.py b/radis/subscriptions/tests/test_views.py index 8a473545..f5ce9f88 100644 --- a/radis/subscriptions/tests/test_views.py +++ b/radis/subscriptions/tests/test_views.py @@ -104,13 +104,13 @@ def test_subscription_create_view_post_valid(client: Client): "filter_questions-MAX_NUM_FORMS": "3", "filter_questions-0-question": "Does the report contain pneumothorax?", "filter_questions-0-expected_answer": "Y", - "extraction_fields-TOTAL_FORMS": "1", - "extraction_fields-INITIAL_FORMS": "0", - "extraction_fields-MIN_NUM_FORMS": "0", - "extraction_fields-MAX_NUM_FORMS": "10", - "extraction_fields-0-name": "Pneumothorax status", - "extraction_fields-0-description": "Extract pneumothorax related findings", - "extraction_fields-0-output_type": "T", + "output_fields-TOTAL_FORMS": "1", + "output_fields-INITIAL_FORMS": "0", + "output_fields-MIN_NUM_FORMS": "0", + "output_fields-MAX_NUM_FORMS": "10", + "output_fields-0-name": "Pneumothorax status", + "output_fields-0-description": "Extract pneumothorax related findings", + "output_fields-0-output_type": "T", } response = client.post("/subscriptions/create/", data) @@ -142,10 +142,10 @@ def test_subscription_create_view_ignores_empty_filter_question(client: Client): "filter_questions-MAX_NUM_FORMS": "3", "filter_questions-0-question": "", "filter_questions-0-expected_answer": "", - "extraction_fields-TOTAL_FORMS": "0", - "extraction_fields-INITIAL_FORMS": "0", - "extraction_fields-MIN_NUM_FORMS": "0", - "extraction_fields-MAX_NUM_FORMS": "10", + "output_fields-TOTAL_FORMS": "0", + "output_fields-INITIAL_FORMS": "0", + "output_fields-MIN_NUM_FORMS": "0", + "output_fields-MAX_NUM_FORMS": "10", } response = client.post("/subscriptions/create/", data) @@ -178,10 +178,10 @@ def test_subscription_create_view_post_duplicate_name(client: Client): "filter_questions-INITIAL_FORMS": "0", "filter_questions-MIN_NUM_FORMS": "0", "filter_questions-MAX_NUM_FORMS": "3", - "extraction_fields-TOTAL_FORMS": "0", - "extraction_fields-INITIAL_FORMS": "0", - "extraction_fields-MIN_NUM_FORMS": "0", - "extraction_fields-MAX_NUM_FORMS": "10", + "output_fields-TOTAL_FORMS": "0", + "output_fields-INITIAL_FORMS": "0", + "output_fields-MIN_NUM_FORMS": "0", + "output_fields-MAX_NUM_FORMS": "10", } response = client.post("/subscriptions/create/", data) @@ -259,14 +259,14 @@ def test_subscription_update_view_post_valid(client: Client): "filter_questions-0-id": question.pk, "filter_questions-0-question": "Updated question?", "filter_questions-0-expected_answer": "N", - "extraction_fields-TOTAL_FORMS": "1", - "extraction_fields-INITIAL_FORMS": "1", - "extraction_fields-MIN_NUM_FORMS": "0", - "extraction_fields-MAX_NUM_FORMS": "10", - "extraction_fields-0-id": output_field.pk, - "extraction_fields-0-name": "Volume", - "extraction_fields-0-description": "Volume description", - "extraction_fields-0-output_type": "N", + "output_fields-TOTAL_FORMS": "1", + "output_fields-INITIAL_FORMS": "1", + "output_fields-MIN_NUM_FORMS": "0", + "output_fields-MAX_NUM_FORMS": "10", + "output_fields-0-id": output_field.pk, + "output_fields-0-name": "Volume", + "output_fields-0-description": "Volume description", + "output_fields-0-output_type": "N", } response = client.post(f"/subscriptions/{subscription.pk}/update/", data) diff --git a/radis/subscriptions/utils/processor_utils.py b/radis/subscriptions/utils/processor_utils.py index 299e7eb1..ac78b9f3 100644 --- a/radis/subscriptions/utils/processor_utils.py +++ b/radis/subscriptions/utils/processor_utils.py @@ -81,7 +81,7 @@ def generate_filter_questions_prompt(mapping: list[tuple[str, FilterQuestion]]) return "\n".join(lines) -def generate_extraction_fields_prompt(mapping: list[tuple[str, OutputField]]) -> str: +def generate_output_fields_prompt(mapping: list[tuple[str, OutputField]]) -> str: if not mapping: return "None" From df06a769bbc677fb39b6b345f4a7feb0ddbac343 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 18 Nov 2025 00:22:37 +0000 Subject: [PATCH 22/93] complete renaming of output field --- radis/subscriptions/forms.py | 8 ++--- .../tests/unit/test_processors.py | 4 +-- radis/subscriptions/utils/testing_helpers.py | 4 +-- radis/subscriptions/views.py | 36 +++++++++---------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index d0ef835d..05274a62 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -90,7 +90,7 @@ def build_layout(self): add_form_label="Add Filter Question", ), Formset( - "extraction_formset", + "output_formset", legend="Extraction Fields", add_form_label="Add Extraction Field", ), @@ -179,7 +179,7 @@ def clean(self) -> dict[str, Any]: return cleaned_data -class ExtractionFieldForm(forms.ModelForm): +class OutputFieldForm(forms.ModelForm): class Meta: model = OutputField fields = ["name", "description", "output_type"] @@ -220,10 +220,10 @@ def __init__(self, *args, **kwargs): can_delete=False, ) -ExtractionFieldFormSet = forms.inlineformset_factory( +OutputFieldFormSet = forms.inlineformset_factory( Subscription, OutputField, - form=ExtractionFieldForm, + form=OutputFieldForm, fk_name="subscription", extra=1, min_num=0, diff --git a/radis/subscriptions/tests/unit/test_processors.py b/radis/subscriptions/tests/unit/test_processors.py index 926eccea..457c1891 100644 --- a/radis/subscriptions/tests/unit/test_processors.py +++ b/radis/subscriptions/tests/unit/test_processors.py @@ -19,7 +19,7 @@ class ExtractionOutput(BaseModel): @pytest.mark.django_db(transaction=True) def test_subscription_task_processor_filters_and_extracts(): - task, filter_question, extraction_field, report = create_subscription_task() + task, filter_question, output_field, report = create_subscription_task() filter_output = FilterOutput(filter_0=True) extraction_output = ExtractionOutput(extraction_0="Pneumothorax status confirmed") @@ -39,5 +39,5 @@ def test_subscription_task_processor_filters_and_extracts(): subscribed_item = SubscribedItem.objects.get(subscription=task.job.subscription, report=report) assert subscribed_item.filter_results == {str(filter_question.pk): True} assert subscribed_item.extraction_results == { - str(extraction_field.pk): "Pneumothorax status confirmed" + str(output_field.pk): "Pneumothorax status confirmed" } diff --git a/radis/subscriptions/utils/testing_helpers.py b/radis/subscriptions/utils/testing_helpers.py index bc1f5a0d..9ec03e91 100644 --- a/radis/subscriptions/utils/testing_helpers.py +++ b/radis/subscriptions/utils/testing_helpers.py @@ -22,7 +22,7 @@ def create_subscription_task(): filter_question = FilterQuestionFactory.create( subscription=subscription, expected_answer=FilterQuestion.ExpectedAnswer.YES ) - extraction_field = OutputFieldFactory.create( + output_field = OutputFieldFactory.create( subscription=subscription, job=None, output_type=OutputType.TEXT, @@ -40,4 +40,4 @@ def create_subscription_task(): report.groups.add(group) task.reports.add(report) - return task, filter_question, extraction_field, report + return task, filter_question, output_field, report diff --git a/radis/subscriptions/views.py b/radis/subscriptions/views.py index 8d4fefd4..a73cc49c 100644 --- a/radis/subscriptions/views.py +++ b/radis/subscriptions/views.py @@ -21,8 +21,8 @@ from radis.subscriptions.tables import SubscriptionTable from .forms import ( - ExtractionFieldFormSet, FilterQuestionFormSet, + OutputFieldFormSet, SubscriptionForm, ) from .models import SubscribedItem, Subscription @@ -58,7 +58,7 @@ def get_queryset(self): super() .get_queryset() .filter(owner=self.request.user) - .prefetch_related("filter_questions", "extraction_fields") + .prefetch_related("filter_questions", "output_fields") ) @@ -72,17 +72,17 @@ def get_context_data(self, **kwargs: Any) -> dict[str, Any]: ctx = super().get_context_data(**kwargs) if self.request.POST: ctx["filter_formset"] = FilterQuestionFormSet(self.request.POST) - ctx["extraction_formset"] = ExtractionFieldFormSet(self.request.POST) + ctx["output_formset"] = OutputFieldFormSet(self.request.POST) else: ctx["filter_formset"] = FilterQuestionFormSet() - ctx["extraction_formset"] = ExtractionFieldFormSet() + ctx["output_formset"] = OutputFieldFormSet() return ctx def form_valid(self, form) -> HttpResponse: ctx = self.get_context_data() filter_formset: BaseInlineFormSet = ctx["filter_formset"] - extraction_formset: BaseInlineFormSet = ctx["extraction_formset"] - if filter_formset.is_valid() and extraction_formset.is_valid(): + output_formset: BaseInlineFormSet = ctx["output_formset"] + if filter_formset.is_valid() and output_formset.is_valid(): user = self.request.user form.instance.owner = user active_group = user.active_group @@ -99,8 +99,8 @@ def form_valid(self, form) -> HttpResponse: filter_formset.instance = self.object filter_formset.save() - extraction_formset.instance = self.object - extraction_formset.save() + output_formset.instance = self.object + output_formset.save() return HttpResponseRedirect(self.get_success_url()) else: return self.form_invalid(form) @@ -120,28 +120,26 @@ def get_queryset(self) -> QuerySet[Subscription]: super() .get_queryset() .filter(owner=self.request.user) - .prefetch_related("filter_questions", "extraction_fields") + .prefetch_related("filter_questions", "output_fields") ) def get_context_data(self, **kwargs: Any) -> dict[str, Any]: ctx = super().get_context_data(**kwargs) if self.request.POST: ctx["filter_formset"] = FilterQuestionFormSet(self.request.POST, instance=self.object) - ctx["extraction_formset"] = ExtractionFieldFormSet( - self.request.POST, instance=self.object - ) + ctx["output_formset"] = OutputFieldFormSet(self.request.POST, instance=self.object) else: ctx["filter_formset"] = FilterQuestionFormSet(instance=self.object) - ctx["extraction_formset"] = ExtractionFieldFormSet(instance=self.object) + ctx["output_formset"] = OutputFieldFormSet(instance=self.object) ctx["filter_formset"].extra = 0 # no additional empty form when editing - ctx["extraction_formset"].extra = 0 + ctx["output_formset"].extra = 0 return ctx def form_valid(self, form) -> HttpResponse: ctx = self.get_context_data() filter_formset = ctx["filter_formset"] - extraction_formset = ctx["extraction_formset"] - if filter_formset.is_valid() and extraction_formset.is_valid(): + output_formset = ctx["output_formset"] + if filter_formset.is_valid() and output_formset.is_valid(): try: self.object = form.save() except IntegrityError as e: @@ -153,8 +151,8 @@ def form_valid(self, form) -> HttpResponse: filter_formset.instance = self.object filter_formset.save() - extraction_formset.instance = self.object - extraction_formset.save() + output_formset.instance = self.object + output_formset.save() return super().form_valid(form) else: @@ -196,7 +194,7 @@ def get_related_queryset(self) -> QuerySet[SubscribedItem]: .prefetch_related( "report", "subscription__filter_questions", - "subscription__extraction_fields", + "subscription__output_fields", ) ) From 80b8293990d14c7eaffba01910e3b84ba4551760 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 18 Nov 2025 00:31:32 +0000 Subject: [PATCH 23/93] missed one renaming, maybe use generic fk for --- .../templates/subscriptions/subscription_detail.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/radis/subscriptions/templates/subscriptions/subscription_detail.html b/radis/subscriptions/templates/subscriptions/subscription_detail.html index 8ba95b04..61170e02 100644 --- a/radis/subscriptions/templates/subscriptions/subscription_detail.html +++ b/radis/subscriptions/templates/subscriptions/subscription_detail.html @@ -134,7 +134,7 @@

Filter Questions

{% endif %} {% endwith %}

Extraction Fields

- {% with fields=subscription.extraction_fields.all %} + {% with fields=subscription.output_fields.all %} {% if fields %}
{% for field in fields %} From def36f1d85dbdfc0323f4bf505b05ef69e8ba356 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 18 Nov 2025 01:01:10 +0000 Subject: [PATCH 24/93] remove unnecessary iter --- radis/subscriptions/models.py | 24 ------------------- .../_subscribed_item_preview.html | 20 ---------------- 2 files changed, 44 deletions(-) diff --git a/radis/subscriptions/models.py b/radis/subscriptions/models.py index f049f065..470c1fa2 100644 --- a/radis/subscriptions/models.py +++ b/radis/subscriptions/models.py @@ -114,30 +114,6 @@ class Meta: def __str__(self): return f"SubscribedItem of {self.subscription} [{self.pk}]" - def iter_filter_results(self) -> list[tuple[FilterQuestion, bool]]: - if not self.filter_results: - return [] - - results: list[tuple[FilterQuestion, bool]] = [] - subscription_questions = {str(q.pk): q for q in self.subscription.filter_questions.all()} - for key, value in self.filter_results.items(): - question = subscription_questions.get(str(key)) - if question is not None: - results.append((question, bool(value))) - return results - - def iter_extraction_results(self) -> list[tuple[OutputField, object]]: - if not self.extraction_results: - return [] - - results: list[tuple[OutputField, object]] = [] - subscription_fields = {str(f.pk): f for f in self.subscription.output_fields.all()} - for key, value in self.extraction_results.items(): - field = subscription_fields.get(str(key)) - if field is not None: - results.append((field, value)) - return results - class SubscriptionJob(AnalysisJob): default_priority = settings.SUBSCRIPTION_DEFAULT_PRIORITY diff --git a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html index e8317076..432891f5 100644 --- a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html +++ b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html @@ -4,26 +4,6 @@
{% include "reports/_report_header.html" with report=subscribed_item.report %}
- {% with filter_results=subscribed_item.iter_filter_results %} - {% if filter_results %} -
-
Filter results
-
    - {% for question, value in filter_results %}
  • {{ question.question }} → {{ value|yesno:"Yes,No" }}
  • {% endfor %} -
-
- {% endif %} - {% endwith %} - {% with extraction_results=subscribed_item.iter_extraction_results %} - {% if extraction_results %} -
-
Extraction results
-
    - {% for field, value in extraction_results %}
  • {{ field.name }} → {{ value|default_if_none:"—" }}
  • {% endfor %} -
-
- {% endif %} - {% endwith %}
{{ subscribed_item.report.body }}
From da048fb6821f57948c3ce1801bff5f461b709d69 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Wed, 19 Nov 2025 13:25:50 +0000 Subject: [PATCH 25/93] Allow middleware to detect locale from users browser (via Accept-Language header) and use it downstream to deal with format parsing and translations --- radis/settings/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/radis/settings/base.py b/radis/settings/base.py index dea0bb53..feeba638 100644 --- a/radis/settings/base.py +++ b/radis/settings/base.py @@ -92,6 +92,7 @@ "django.middleware.security.SecurityMiddleware", "whitenoise.middleware.WhiteNoiseMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.locale.LocaleMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", From eb0829919b7df9ffc474a04dd708d3f1d6d5d72c Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 19:31:52 +0000 Subject: [PATCH 26/93] readd provider form field and use procrastinate_on_delete_sql --- radis/subscriptions/forms.py | 12 ++++++++++ .../0011_procrastinate_on_delete.py | 23 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 radis/subscriptions/migrations/0011_procrastinate_on_delete.py diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 05274a62..157363fa 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -11,6 +11,7 @@ from radis.search.forms import AGE_STEP, MAX_AGE, MIN_AGE from .models import FilterQuestion, Subscription +from .site import subscription_retrieval_providers class SubscriptionForm(forms.ModelForm): @@ -18,6 +19,7 @@ class Meta: model = Subscription fields = [ "name", + "provider", "query", "language", "modalities", @@ -31,12 +33,21 @@ class Meta: labels = {"patient_id": "Patient ID"} help_texts = { "name": "Name of the Subscription", + "provider": "The search provider to use for the database query", "query": "A query to filter reports", } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.fields["provider"].widget = forms.Select( + choices=sorted( + [ + (provider.name, provider.name) + for provider in subscription_retrieval_providers.values() + ] + ) + ) self.fields["language"].choices = [ # type: ignore (language.pk, LANGUAGE_LABELS[language.code]) for language in Language.objects.order_by("code") @@ -82,6 +93,7 @@ def build_layout(self): Row( Column( "name", + "provider", "query", "send_finished_mail", Formset( diff --git a/radis/subscriptions/migrations/0011_procrastinate_on_delete.py b/radis/subscriptions/migrations/0011_procrastinate_on_delete.py new file mode 100644 index 00000000..bd5cd156 --- /dev/null +++ b/radis/subscriptions/migrations/0011_procrastinate_on_delete.py @@ -0,0 +1,23 @@ +# Generated by Django 5.1.6 on 2025-02-26 08:51 + +from django.db import migrations + +from adit_radis_shared.common.utils.migration_utils import procrastinate_on_delete_sql + +class Migration(migrations.Migration): + + dependencies = [ + ("subscriptions", "0010_rename_answers_subscribeditem_filter_results_and_more"), + ("procrastinate", "0028_add_cancel_states"), + ] + + operations = [ + migrations.RunSQL( + sql=procrastinate_on_delete_sql("subscriptions", "subscriptionjob"), + reverse_sql=procrastinate_on_delete_sql("subscriptions", "subscriptionjob", reverse=True), + ), + migrations.RunSQL( + sql=procrastinate_on_delete_sql("subscriptions", "subscriptiontask"), + reverse_sql=procrastinate_on_delete_sql("subscriptions", "subscriptiontask", reverse=True), + ), + ] From 018d8aede29a602742cc461c1a220fe907639e07 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 20:04:42 +0000 Subject: [PATCH 27/93] small fixes --- radis/subscriptions/forms.py | 2 +- radis/subscriptions/processors.py | 7 ++++--- radis/subscriptions/utils/processor_utils.py | 4 ++-- radis/subscriptions/views.py | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index 157363fa..e6cfe226 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -200,7 +200,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields["name"].required = True - self.fields["description"].required = False + self.fields["description"].required = True self.helper = FormHelper() self.helper.form_tag = False diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index a94f3555..4ed4558e 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -65,9 +65,10 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: filter_response = self.client.extract_data(filter_prompt, filter_bundle.schema) for field_name, question in filter_bundle.mapping: - answer = bool(getattr(filter_response, field_name)) - filter_results[str(question.pk)] = answer - if answer != question.expected_answer_bool: + answer = getattr(filter_response, field_name, None) + answer_bool = False if answer is None else bool(answer) + filter_results[str(question.pk)] = answer_bool + if answer_bool != question.expected_answer_bool: is_accepted = False else: logger.debug( diff --git a/radis/subscriptions/utils/processor_utils.py b/radis/subscriptions/utils/processor_utils.py index ac78b9f3..d0df2e91 100644 --- a/radis/subscriptions/utils/processor_utils.py +++ b/radis/subscriptions/utils/processor_utils.py @@ -29,7 +29,7 @@ def build_filter_schema(questions: QuerySet[FilterQuestion]) -> FilterSchemaBund field_definitions: dict[str, Any] = {} mapping: list[tuple[str, FilterQuestion]] = [] - for index, question in enumerate(questions.all()): + for index, question in enumerate(questions.order_by("pk")): field_name = f"filter_{index}" field_definitions[field_name] = (bool, ...) mapping.append((field_name, question)) @@ -47,7 +47,7 @@ def build_extraction_schema(fields: QuerySet[OutputField]) -> ExtractionSchemaBu field_definitions: dict[str, Any] = {} mapping: list[tuple[str, OutputField]] = [] - for index, field in enumerate(fields.all()): + for index, field in enumerate(fields.order_by("pk")): field_name = f"extraction_{index}" if field.output_type == OutputType.TEXT: output_type = str diff --git a/radis/subscriptions/views.py b/radis/subscriptions/views.py index a73cc49c..4a84b332 100644 --- a/radis/subscriptions/views.py +++ b/radis/subscriptions/views.py @@ -92,7 +92,7 @@ def form_valid(self, form) -> HttpResponse: self.object: Subscription = form.save() except IntegrityError as e: if "unique_subscription_name_per_user" in str(e): - form.add_error("name", "An subscription with this name already exists.") + form.add_error("name", "A subscription with this name already exists.") return self.form_invalid(form) raise e @@ -144,7 +144,7 @@ def form_valid(self, form) -> HttpResponse: self.object = form.save() except IntegrityError as e: if "unique_subscription_name_per_user" in str(e): - form.add_error("name", "An subscription with this name already exists.") + form.add_error("name", "A subscription with this name already exists.") return self.form_invalid(form) raise e From e7e6c7c4395b0ad516c748825696de8c2f7ad83e Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 20:14:42 +0000 Subject: [PATCH 28/93] stronger form validation --- radis/subscriptions/forms.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index e6cfe226..f91aa322 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -178,6 +178,12 @@ def has_changed(self) -> bool: return super().has_changed() + def clean_question(self): + question = self.cleaned_data["question"] + if len(question) > 300: # already enforced by model + raise forms.ValidationError("Question too long") + return question + def clean(self) -> dict[str, Any]: cleaned_data = super().clean() assert cleaned_data @@ -185,7 +191,7 @@ def clean(self) -> dict[str, Any]: question = cleaned_data.get("question") expected_answer = cleaned_data.get("expected_answer") - if bool(question) ^ bool(expected_answer): + if (question and not expected_answer) or (expected_answer and not question): raise forms.ValidationError("You must provide both a question and an expected answer.") return cleaned_data From a67fe075876cc4e3aa0c262419dab23e07ac4256 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 20:31:03 +0000 Subject: [PATCH 29/93] fix filter logic --- radis/subscriptions/forms.py | 6 ------ radis/subscriptions/processors.py | 11 +++++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index f91aa322..e5eaf7fb 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -178,12 +178,6 @@ def has_changed(self) -> bool: return super().has_changed() - def clean_question(self): - question = self.cleaned_data["question"] - if len(question) > 300: # already enforced by model - raise forms.ValidationError("Question too long") - return question - def clean(self) -> dict[str, Any]: cleaned_data = super().clean() assert cleaned_data diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index 4ed4558e..6f129aeb 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -66,10 +66,13 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: for field_name, question in filter_bundle.mapping: answer = getattr(filter_response, field_name, None) - answer_bool = False if answer is None else bool(answer) - filter_results[str(question.pk)] = answer_bool - if answer_bool != question.expected_answer_bool: + if answer is None: is_accepted = False + else: + answer_bool = bool(answer) + filter_results[str(question.pk)] = answer_bool + if answer_bool != question.expected_answer_bool: + is_accepted = False else: logger.debug( "Subscription %s has no filter questions; accepting report %s by default", @@ -96,7 +99,7 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: ) for field_name, field in extraction_bundle.mapping: - extraction_results[str(field.pk)] = getattr(extraction_response, field_name) + extraction_results[str(field.pk)] = getattr(extraction_response, field_name, None) SubscribedItem.objects.create( subscription=task.job.subscription, From ed9949515a01c009c7923037722dad4acfcf63b8 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 21:47:14 +0000 Subject: [PATCH 30/93] rework pydantic model generation --- radis/subscriptions/processors.py | 40 +++++---- .../tests/unit/test_processors.py | 29 +++--- radis/subscriptions/utils/processor_utils.py | 88 ++++--------------- 3 files changed, 59 insertions(+), 98 deletions(-) diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index 6f129aeb..a9a31804 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -9,6 +9,10 @@ from radis.chats.utils.chat_client import ChatClient from radis.core.processors import AnalysisTaskProcessor +from radis.extractions.utils.processor_utils import ( + generate_output_fields_prompt, + generate_output_fields_schema, +) from radis.reports.models import Report from .models import ( @@ -17,10 +21,10 @@ SubscriptionTask, ) from .utils.processor_utils import ( - build_extraction_schema, - build_filter_schema, generate_filter_questions_prompt, - generate_output_fields_prompt, + generate_filter_questions_schema, + get_filter_question_field_name, + get_output_field_name, ) logger = logging.getLogger(__name__) @@ -50,21 +54,24 @@ def process_task(self, task: SubscriptionTask) -> None: def process_report(self, report: Report, task: SubscriptionTask) -> None: subscription: Subscription = task.job.subscription - filter_bundle = build_filter_schema(subscription.filter_questions) filter_results: dict[str, bool] = {} is_accepted = True - if filter_bundle.mapping: + filter_questions = subscription.filter_questions.order_by("pk") + + if filter_questions.exists(): filter_prompt = Template(settings.SUBSCRIPTION_FILTER_PROMPT).substitute( { "report": report.body, - "questions": generate_filter_questions_prompt(filter_bundle.mapping), + "questions": generate_filter_questions_prompt(filter_questions), } ) - filter_response = self.client.extract_data(filter_prompt, filter_bundle.schema) + filter_schema = generate_filter_questions_schema(filter_questions) + filter_response = self.client.extract_data(filter_prompt, filter_schema) - for field_name, question in filter_bundle.mapping: + for question in filter_questions.all(): + field_name = get_filter_question_field_name(question) answer = getattr(filter_response, field_name, None) if answer is None: is_accepted = False @@ -84,22 +91,23 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: logger.debug(f"Report {report.pk} was rejected by subscription {subscription.pk}") return - extraction_bundle = build_extraction_schema(subscription.output_fields) extraction_results: dict[str, Any] = {} + output_fields = subscription.output_fields.order_by("pk") - if extraction_bundle.mapping: + if output_fields.exists(): extraction_prompt = Template(settings.SUBSCRIPTION_EXTRACTION_PROMPT).substitute( { "report": report.body, - "fields": generate_output_fields_prompt(extraction_bundle.mapping), + "fields": generate_output_fields_prompt(output_fields), } ) - extraction_response = self.client.extract_data( - extraction_prompt, extraction_bundle.schema - ) + extraction_schema = generate_output_fields_schema(output_fields) + extraction_response = self.client.extract_data(extraction_prompt, extraction_schema) - for field_name, field in extraction_bundle.mapping: - extraction_results[str(field.pk)] = getattr(extraction_response, field_name, None) + for field in output_fields.all(): + extraction_results[str(field.pk)] = getattr( + extraction_response, get_output_field_name(field), None + ) SubscribedItem.objects.create( subscription=task.job.subscription, diff --git a/radis/subscriptions/tests/unit/test_processors.py b/radis/subscriptions/tests/unit/test_processors.py index 457c1891..ca323c2e 100644 --- a/radis/subscriptions/tests/unit/test_processors.py +++ b/radis/subscriptions/tests/unit/test_processors.py @@ -1,28 +1,35 @@ from unittest.mock import MagicMock, patch import pytest -from pydantic import BaseModel +from pydantic import create_model from radis.chats.utils.testing_helpers import create_openai_client_mock from radis.subscriptions.models import SubscribedItem from radis.subscriptions.processors import SubscriptionTaskProcessor +from radis.subscriptions.utils.processor_utils import ( + get_filter_question_field_name, + get_output_field_name, +) from radis.subscriptions.utils.testing_helpers import create_subscription_task -class FilterOutput(BaseModel): - filter_0: bool - - -class ExtractionOutput(BaseModel): - extraction_0: str - - @pytest.mark.django_db(transaction=True) def test_subscription_task_processor_filters_and_extracts(): task, filter_question, output_field, report = create_subscription_task() - filter_output = FilterOutput(filter_0=True) - extraction_output = ExtractionOutput(extraction_0="Pneumothorax status confirmed") + filter_field_name = get_filter_question_field_name(filter_question) + extraction_field_name = get_output_field_name(output_field) + filter_field_definitions = {} + filter_field_definitions[filter_field_name] = (bool, ...) + + extraction_field_definitions = {} + extraction_field_definitions[extraction_field_name] = (str, ...) + + FilterOutput = create_model("FilterOutput", **filter_field_definitions) + ExtractionOutput = create_model("ExtractionOutput", **extraction_field_definitions) + + filter_output = FilterOutput(**{filter_field_name: True}) + extraction_output = ExtractionOutput(**{extraction_field_name: "Pneumothorax status confirmed"}) filter_response = MagicMock(choices=[MagicMock(message=MagicMock(parsed=filter_output))]) extraction_response = MagicMock( diff --git a/radis/subscriptions/utils/processor_utils.py b/radis/subscriptions/utils/processor_utils.py index d0df2e91..1c25ef2b 100644 --- a/radis/subscriptions/utils/processor_utils.py +++ b/radis/subscriptions/utils/processor_utils.py @@ -1,94 +1,40 @@ from __future__ import annotations -from dataclasses import dataclass from typing import Any from django.db.models import QuerySet from pydantic import BaseModel, create_model -from radis.extractions.models import OutputField, OutputType +from radis.extractions.models import OutputField from ..models import FilterQuestion -type Numeric = float | int +def _filter_question_field_name(question: FilterQuestion) -> str: + return f"question_{question.pk}" -@dataclass(slots=True) -class FilterSchemaBundle: - schema: type[BaseModel] - mapping: list[tuple[str, FilterQuestion]] +def get_filter_question_field_name(question: FilterQuestion) -> str: + return _filter_question_field_name(question) -@dataclass(slots=True) -class ExtractionSchemaBundle: - schema: type[BaseModel] - mapping: list[tuple[str, OutputField]] +def get_output_field_name(field: OutputField) -> str: + return field.name -def build_filter_schema(questions: QuerySet[FilterQuestion]) -> FilterSchemaBundle: + +def generate_filter_questions_schema(questions: QuerySet[FilterQuestion]) -> type[BaseModel]: field_definitions: dict[str, Any] = {} - mapping: list[tuple[str, FilterQuestion]] = [] - for index, question in enumerate(questions.order_by("pk")): - field_name = f"filter_{index}" + for question in questions.order_by("pk").all(): + field_name = _filter_question_field_name(question) field_definitions[field_name] = (bool, ...) - mapping.append((field_name, question)) model_name = "SubscriptionFilterResultsModel" - schema = ( - create_model(model_name, **field_definitions) - if field_definitions - else create_model(model_name) - ) - return FilterSchemaBundle(schema, mapping) + return create_model(model_name, **field_definitions) -def build_extraction_schema(fields: QuerySet[OutputField]) -> ExtractionSchemaBundle: - field_definitions: dict[str, Any] = {} - mapping: list[tuple[str, OutputField]] = [] - - for index, field in enumerate(fields.order_by("pk")): - field_name = f"extraction_{index}" - if field.output_type == OutputType.TEXT: - output_type = str - elif field.output_type == OutputType.NUMERIC: - output_type = Numeric - elif field.output_type == OutputType.BOOLEAN: - output_type = bool - else: - raise ValueError(f"Unknown output type: {field.output_type}") - - field_definitions[field_name] = (output_type, ...) - - mapping.append((field_name, field)) - - model_name = "SubscriptionExtractionResultsModel" - schema = ( - create_model(model_name, **field_definitions) - if field_definitions - else create_model(model_name) - ) - return ExtractionSchemaBundle(schema, mapping) - - -def generate_filter_questions_prompt(mapping: list[tuple[str, FilterQuestion]]) -> str: - if not mapping: - return "None" - - lines: list[str] = [] - for field_name, question in mapping: - lines.append(f"{field_name}: {question.question}") - return "\n".join(lines) - - -def generate_output_fields_prompt(mapping: list[tuple[str, OutputField]]) -> str: - if not mapping: - return "None" - - lines: list[str] = [] - for field_name, field in mapping: - description = field.description or "No description provided." - lines.append( - f"{field_name}: {field.name} — {description} [type: {field.get_output_type_display()}]" - ) - return "\n".join(lines) +def generate_filter_questions_prompt(questions: QuerySet[FilterQuestion]) -> str: + prompt = "" + for question in questions.order_by("pk").all(): + prompt += f"{_filter_question_field_name(question)}: {question.question}\n" + return prompt From 362bfea8b514501b0871e34081dbeffe202297fe Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 22:00:43 +0000 Subject: [PATCH 31/93] add more tests for subscription processor --- .../tests/unit/test_processors.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/radis/subscriptions/tests/unit/test_processors.py b/radis/subscriptions/tests/unit/test_processors.py index ca323c2e..bb7416bb 100644 --- a/radis/subscriptions/tests/unit/test_processors.py +++ b/radis/subscriptions/tests/unit/test_processors.py @@ -48,3 +48,78 @@ def test_subscription_task_processor_filters_and_extracts(): assert subscribed_item.extraction_results == { str(output_field.pk): "Pneumothorax status confirmed" } + + +@pytest.mark.django_db(transaction=True) +def test_subscription_task_processor_handles_llm_null_response(): + task, _, _, report = create_subscription_task() + + processor = SubscriptionTaskProcessor(task) + processor.client.extract_data = MagicMock(return_value=None) + + processor.start() + + assert not SubscribedItem.objects.filter( + subscription=task.job.subscription, report=report + ).exists() + processor.client.extract_data.assert_called_once() + + +@pytest.mark.django_db(transaction=True) +def test_subscription_task_processor_with_no_expected_answer(): + task, filter_question, _, report = create_subscription_task() + + filter_field_name = get_filter_question_field_name(filter_question) + filter_response = MagicMock() + setattr(filter_response, filter_field_name, None) + + processor = SubscriptionTaskProcessor(task) + processor.client.extract_data = MagicMock(return_value=filter_response) + + processor.start() + + assert not SubscribedItem.objects.filter( + subscription=task.job.subscription, report=report + ).exists() + processor.client.extract_data.assert_called_once() + + +@pytest.mark.django_db(transaction=True) +def test_subscription_task_processor_extraction_only(): + task, _, output_field, report = create_subscription_task() + task.job.subscription.filter_questions.all().delete() + + extraction_field_name = get_output_field_name(output_field) + extraction_field_definitions = {} + extraction_field_definitions[extraction_field_name] = (str, ...) + + ExtractionOutput = create_model("ExtractionOnlyOutput", **extraction_field_definitions) + extraction_output = ExtractionOutput(**{extraction_field_name: "Only extraction response"}) + + openai_mock = create_openai_client_mock(extraction_output) + with patch("openai.OpenAI", return_value=openai_mock): + SubscriptionTaskProcessor(task).start() + + subscribed_item = SubscribedItem.objects.get(subscription=task.job.subscription, report=report) + assert subscribed_item.filter_results is None + assert subscribed_item.extraction_results == {str(output_field.pk): "Only extraction response"} + + +@pytest.mark.django_db(transaction=True) +def test_subscription_task_processor_filter_only(): + task, filter_question, output_field, report = create_subscription_task() + output_field.delete() + + filter_field_name = get_filter_question_field_name(filter_question) + filter_field_definitions = {} + filter_field_definitions[filter_field_name] = (bool, ...) + FilterOutput = create_model("FilterOnlyOutput", **filter_field_definitions) + filter_output = FilterOutput(**{filter_field_name: True}) + + openai_mock = create_openai_client_mock(filter_output) + with patch("openai.OpenAI", return_value=openai_mock): + SubscriptionTaskProcessor(task).start() + + subscribed_item = SubscribedItem.objects.get(subscription=task.job.subscription, report=report) + assert subscribed_item.filter_results == {str(filter_question.pk): True} + assert subscribed_item.extraction_results is None From d839e58f39b1505018374bb05bac27831fbbb230 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 22:11:15 +0000 Subject: [PATCH 32/93] remove redundant db query --- radis/subscriptions/processors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/radis/subscriptions/processors.py b/radis/subscriptions/processors.py index a9a31804..a512b10c 100644 --- a/radis/subscriptions/processors.py +++ b/radis/subscriptions/processors.py @@ -70,7 +70,7 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: filter_schema = generate_filter_questions_schema(filter_questions) filter_response = self.client.extract_data(filter_prompt, filter_schema) - for question in filter_questions.all(): + for question in filter_questions: field_name = get_filter_question_field_name(question) answer = getattr(filter_response, field_name, None) if answer is None: @@ -104,7 +104,7 @@ def process_report(self, report: Report, task: SubscriptionTask) -> None: extraction_schema = generate_output_fields_schema(output_fields) extraction_response = self.client.extract_data(extraction_prompt, extraction_schema) - for field in output_fields.all(): + for field in output_fields: extraction_results[str(field.pk)] = getattr( extraction_response, get_output_field_name(field), None ) From 2fc97d78516d2af2181b4873d68fc6d10b91de54 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Wed, 19 Nov 2025 22:50:49 +0000 Subject: [PATCH 33/93] readd provider validation --- radis/subscriptions/forms.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/radis/subscriptions/forms.py b/radis/subscriptions/forms.py index e5eaf7fb..32c0cd59 100644 --- a/radis/subscriptions/forms.py +++ b/radis/subscriptions/forms.py @@ -139,6 +139,13 @@ def clean(self) -> dict[str, Any] | None: if age_from is not None and age_till is not None and age_from >= age_till: raise forms.ValidationError("Age from must be less than age till") + provider = self.cleaned_data["provider"] + query = self.cleaned_data["query"] + if query != "" and not provider: + raise forms.ValidationError( + "Setup of RADIS is incomplete. No retrieval providers are registered." + ) + return super().clean() From d726ea9e6c2e37e5f2b625ba0bf908204ab7b35c Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Thu, 20 Nov 2025 11:11:17 +0000 Subject: [PATCH 34/93] display extraction results in subscription inbox --- .../_subscribed_item_preview.html | 16 ++++++++++++++++ radis/subscriptions/templatetags/__init__.py | 1 + .../templatetags/subscriptions_extras.py | 19 +++++++++++++++++++ radis/subscriptions/views.py | 1 - 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 radis/subscriptions/templatetags/__init__.py create mode 100644 radis/subscriptions/templatetags/subscriptions_extras.py diff --git a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html index 432891f5..40dd041d 100644 --- a/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html +++ b/radis/subscriptions/templates/subscriptions/_subscribed_item_preview.html @@ -1,3 +1,4 @@ +{% load subscriptions_extras %}
@@ -6,6 +7,21 @@
{{ subscribed_item.report.body }}
+ {% if subscribed_item.extraction_results %} +
+
Extracted Fields
+
+ {% for field in subscribed_item.subscription.output_fields.all %} + {% with field_key=field.pk|stringformat:"s" %} +
{{ field.name }}
+
+ {{ subscribed_item.extraction_results|get_item:field_key|default_if_none:"—" }} +
+ {% endwith %} + {% endfor %} +
+
+ {% endif %}
' + ), + css_class="col-md-1 col-2 d-flex align-items-center justify-content-end array-toggle-field", + ), + css_class="g-3 align-items-center", ), "description", Div( @@ -257,6 +278,12 @@ def clean_selection_options(self) -> list[str]: return cleaned + def clean_is_array(self) -> bool: + raw_value = (self.cleaned_data.get("is_array") or "").strip().lower() + if raw_value in {"1", "true", "on"}: + return True + return False + def clean(self): cleaned_data = super().clean() if not cleaned_data: diff --git a/radis/extractions/migrations/0005_outputfield_is_array.py b/radis/extractions/migrations/0005_outputfield_is_array.py new file mode 100644 index 00000000..568a02c3 --- /dev/null +++ b/radis/extractions/migrations/0005_outputfield_is_array.py @@ -0,0 +1,15 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("extractions", "0004_outputfield_selection_options"), + ] + + operations = [ + migrations.AddField( + model_name="outputfield", + name="is_array", + field=models.BooleanField(default=False), + ), + ] diff --git a/radis/extractions/models.py b/radis/extractions/models.py index 2ae477b9..371f3b9e 100644 --- a/radis/extractions/models.py +++ b/radis/extractions/models.py @@ -88,6 +88,7 @@ class OutputField(models.Model): get_output_type_display: Callable[[], str] optional = models.BooleanField(default=False) selection_options = models.JSONField(default=list, blank=True) + is_array = models.BooleanField(default=False) job = models.ForeignKey[ExtractionJob]( ExtractionJob, on_delete=models.CASCADE, related_name="output_fields" ) diff --git a/radis/extractions/templates/extractions/_selection_options_field.html b/radis/extractions/templates/extractions/_selection_options_field.html index 91437e67..ed50f0e9 100644 --- a/radis/extractions/templates/extractions/_selection_options_field.html +++ b/radis/extractions/templates/extractions/_selection_options_field.html @@ -1,19 +1,22 @@ {% load bootstrap_icon from common_extras %}
-
- {{ form.selection_options }} - - + {{ form.selection_options }} + {{ form.is_array }} +
+
+ + +