diff --git a/radis/chats/utils/chat_client.py b/radis/chats/utils/chat_client.py
index b310247a..c9c1f9dd 100644
--- a/radis/chats/utils/chat_client.py
+++ b/radis/chats/utils/chat_client.py
@@ -16,53 +16,171 @@ def _get_base_url() -> str:
return base_url
-class AsyncChatClient:
+def _validate_completion_response(completion) -> str:
+ """
+ Validates that the LLM completion response contains valid content.
+
+ Args:
+ completion: The completion response from the LLM
+
+ Returns:
+ The message content as a string
+
+ Raises:
+ ValueError: If the response is empty or invalid
+ """
+ if not completion.choices:
+ logger.error("LLM returned empty choices list")
+ raise ValueError("LLM returned no response choices")
+
+ answer = completion.choices[0].message.content
+ if answer is None:
+ logger.error("LLM returned None for message content")
+ raise ValueError("LLM returned empty response content")
+
+ return answer
+
+
+def _validate_parsed_response(completion) -> BaseModel:
+ """
+ Validates that the LLM completion response contains valid parsed data.
+
+ Args:
+ completion: The completion response from the LLM
+
+ Returns:
+ The parsed BaseModel instance
+
+ Raises:
+ ValueError: If the response is empty or invalid
+ """
+ if not completion.choices:
+ logger.error("LLM returned empty choices list")
+ raise ValueError("LLM returned no response choices")
+
+ parsed = completion.choices[0].message.parsed
+ if parsed is None:
+ logger.error("LLM returned None for parsed message")
+ raise ValueError("LLM returned empty parsed response")
+
+ return parsed
+
+
+def _handle_api_error(error: openai.APIError, operation: str) -> None:
+ """
+ Logs and re-raises API errors with consistent error messages.
+
+ Args:
+ error: The API error from OpenAI
+ operation: Description of the operation that failed (e.g., "chat", "data extraction")
+
+ Raises:
+ RuntimeError: Always raises with a user-friendly error message
+ """
+ logger.error(f"OpenAI API error during {operation}: {error}")
+ raise RuntimeError(f"Failed to communicate with LLM service: {error}") from error
+
+
+class _BaseChatClient:
+ """Base class containing shared chat client logic."""
+
def __init__(self):
- base_url = _get_base_url()
- api_key = settings.EXTERNAL_LLM_PROVIDER_API_KEY
- self._client = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
+ self._base_url = _get_base_url()
+ self._api_key = settings.EXTERNAL_LLM_PROVIDER_API_KEY
self._model_name = settings.LLM_MODEL_NAME
- async def chat(
+ def _build_chat_request(
self,
messages: Iterable[ChatCompletionMessageParam],
max_completion_tokens: int | None = None,
- ) -> str:
- logger.debug(f"Sending messages to LLM for chat:\n{messages}")
-
+ ) -> dict:
+ """Build the request dictionary for chat completion."""
request = {
"model": self._model_name,
"messages": messages,
}
if max_completion_tokens is not None:
request["max_completion_tokens"] = max_completion_tokens
+ return request
- completion = await self._client.chat.completions.create(**request)
- answer = completion.choices[0].message.content
- assert answer is not None
+ def _log_request(self, messages: Iterable[ChatCompletionMessageParam]) -> None:
+ """Log the outgoing request."""
+ logger.debug(f"Sending messages to LLM for chat:\n{messages}")
+
+ def _log_response(self, answer: str) -> None:
+ """Log the response from LLM."""
logger.debug("Received from LLM: %s", answer)
+
+
+class AsyncChatClient(_BaseChatClient):
+ def __init__(self):
+ super().__init__()
+ self._client = openai.AsyncOpenAI(base_url=self._base_url, api_key=self._api_key)
+
+ async def chat(
+ self,
+ messages: Iterable[ChatCompletionMessageParam],
+ max_completion_tokens: int | None = None,
+ ) -> str:
+ self._log_request(messages)
+ request = self._build_chat_request(messages, max_completion_tokens)
+
+ try:
+ completion = await self._client.chat.completions.create(**request)
+ except openai.APIError as e:
+ _handle_api_error(e, "chat")
+
+ answer = _validate_completion_response(completion)
+ self._log_response(answer)
return answer
-class ChatClient:
+class ChatClient(_BaseChatClient):
def __init__(self) -> None:
- base_url = _get_base_url()
- api_key = settings.EXTERNAL_LLM_PROVIDER_API_KEY
+ super().__init__()
+ self._client = openai.OpenAI(base_url=self._base_url, api_key=self._api_key)
+
+ def chat(
+ self,
+ messages: Iterable[ChatCompletionMessageParam],
+ max_completion_tokens: int | None = None,
+ ) -> str:
+ """
+ Send messages to LLM and return the response text.
- self._client = openai.OpenAI(base_url=base_url, api_key=api_key)
- self._llm_model_name = settings.LLM_MODEL_NAME
+ Args:
+ messages: List of message dictionaries with 'role' and 'content'
+ max_completion_tokens: Optional maximum tokens to generate
+
+ Returns:
+ The LLM's response as a string
+ """
+ self._log_request(messages)
+ request = self._build_chat_request(messages, max_completion_tokens)
+
+ try:
+ completion = self._client.chat.completions.create(**request)
+ except openai.APIError as e:
+ _handle_api_error(e, "chat")
+
+ answer = _validate_completion_response(completion)
+ self._log_response(answer)
+ return answer
def extract_data(self, prompt: str, schema: type[BaseModel]) -> BaseModel:
logger.debug("Sending prompt and schema to LLM to extract data.")
logger.debug("Prompt:\n%s", prompt)
logger.debug("Schema:\n%s", schema.model_json_schema())
- completion = self._client.beta.chat.completions.parse(
- model=self._llm_model_name,
- messages=[{"role": "system", "content": prompt}],
- response_format=schema,
- )
- event = completion.choices[0].message.parsed
- assert event
+ try:
+ completion = self._client.beta.chat.completions.parse(
+ model=self._model_name,
+ messages=[{"role": "system", "content": prompt}],
+ response_format=schema,
+ )
+ except openai.APIError as e:
+ _handle_api_error(e, "data extraction")
+
+ event = _validate_parsed_response(completion)
logger.debug("Received from LLM: %s", event)
return event
diff --git a/radis/core/constants.py b/radis/core/constants.py
index db1b048b..8e1d74bd 100644
--- a/radis/core/constants.py
+++ b/radis/core/constants.py
@@ -2,3 +2,7 @@
"de": "German",
"en": "English",
}
+
+MIN_AGE = 0
+MAX_AGE = 120
+AGE_STEP = 10
diff --git a/radis/core/form_fields.py b/radis/core/form_fields.py
new file mode 100644
index 00000000..1ce2a311
--- /dev/null
+++ b/radis/core/form_fields.py
@@ -0,0 +1,177 @@
+"""
+Reusable form field factories for RADIS forms.
+
+This module provides factory functions for commonly used form fields
+to reduce duplication across the codebase.
+"""
+
+from typing import Literal, overload
+
+from django import forms
+
+from radis.core.constants import AGE_STEP, LANGUAGE_LABELS, MAX_AGE, MIN_AGE
+from radis.reports.models import Language, Modality
+
+
+@overload
+def create_language_field(
+ required: bool = False,
+ empty_label: str | None = None,
+ use_pk: Literal[True] = True,
+) -> forms.ModelChoiceField: ...
+
+
+@overload
+def create_language_field(
+ required: bool = False,
+ empty_label: str | None = None,
+ use_pk: Literal[False] = False,
+) -> forms.ChoiceField: ...
+
+
+def create_language_field(
+ required: bool = False,
+ empty_label: str | None = None,
+ use_pk: bool = True,
+) -> forms.ModelChoiceField | forms.ChoiceField:
+ """
+ Create a language choice field with consistent configuration.
+
+ Args:
+ required: Whether the field is required
+ empty_label: Label for empty option (None = no empty option)
+ use_pk: If True, returns ModelChoiceField with Language objects;
+ if False, returns ChoiceField with code strings
+
+ Returns:
+ ModelChoiceField (if use_pk=True) or ChoiceField (if use_pk=False)
+
+ Example:
+ # For extraction forms (uses ModelChoiceField, returns Language objects)
+ self.fields["language"] = create_language_field()
+
+ # For subscription forms (uses ModelChoiceField, allows "All")
+ self.fields["language"] = create_language_field(empty_label="All")
+
+ # For search forms (uses ChoiceField with codes)
+ self.fields["language"] = create_language_field(use_pk=False)
+ """
+ languages = Language.objects.order_by("code")
+
+ if use_pk:
+ # Return ModelChoiceField - cleaned_data will contain Language objects
+ field = forms.ModelChoiceField(
+ queryset=languages,
+ required=required,
+ empty_label=empty_label,
+ )
+ field.label_from_instance = lambda obj: LANGUAGE_LABELS[obj.code]
+ return field
+ else:
+ # Return ChoiceField - cleaned_data will contain code strings
+ field = forms.ChoiceField(required=required)
+ field.choices = [(lang.code, LANGUAGE_LABELS[lang.code]) for lang in languages]
+ if empty_label is not None:
+ field.empty_label = empty_label # type: ignore
+ return field
+
+
+@overload
+def create_modality_field(
+ required: bool = False,
+ widget_size: int = 6,
+ use_pk: Literal[True] = True,
+) -> forms.ModelMultipleChoiceField: ...
+
+
+@overload
+def create_modality_field(
+ required: bool = False,
+ widget_size: int = 6,
+ use_pk: Literal[False] = False,
+) -> forms.MultipleChoiceField: ...
+
+
+def create_modality_field(
+ required: bool = False,
+ widget_size: int = 6,
+ use_pk: bool = True,
+) -> forms.ModelMultipleChoiceField | forms.MultipleChoiceField:
+ """
+ Create a modality multiple choice field with consistent configuration.
+
+ Args:
+ required: Whether the field is required
+ widget_size: Height of the select widget
+ use_pk: If True, returns ModelMultipleChoiceField with Modality objects;
+ if False, returns MultipleChoiceField with code strings
+
+ Returns:
+ ModelMultipleChoiceField (if use_pk=True) or MultipleChoiceField (if use_pk=False)
+
+ Example:
+ # For extraction forms (uses ModelMultipleChoiceField, returns Modality objects)
+ self.fields["modalities"] = create_modality_field()
+
+ # For search forms (uses MultipleChoiceField with codes)
+ self.fields["modalities"] = create_modality_field(use_pk=False)
+ """
+ modalities = Modality.objects.filter(filterable=True).order_by("code")
+
+ if use_pk:
+ # Return ModelMultipleChoiceField - cleaned_data will contain Modality objects
+ field = forms.ModelMultipleChoiceField(
+ queryset=modalities,
+ required=required,
+ )
+ # Display just the code for each modality
+ field.label_from_instance = lambda obj: obj.code
+ field.widget.attrs["size"] = widget_size
+ return field
+ else:
+ # Return MultipleChoiceField - cleaned_data will contain code strings
+ field = forms.MultipleChoiceField(required=required)
+ field.choices = [(mod.code, mod.code) for mod in modalities]
+ field.widget.attrs["size"] = widget_size
+ return field
+
+
+def create_age_range_fields() -> tuple[forms.IntegerField, forms.IntegerField]:
+ """
+ Create age_from and age_till fields with consistent configuration.
+
+ Returns:
+ Tuple of (age_from_field, age_till_field)
+
+ Example:
+ age_from, age_till = create_age_range_fields()
+ self.fields["age_from"] = age_from
+ self.fields["age_till"] = age_till
+ """
+ age_from = forms.IntegerField(
+ required=False,
+ min_value=MIN_AGE,
+ max_value=MAX_AGE,
+ widget=forms.NumberInput(
+ attrs={
+ "type": "range",
+ "step": AGE_STEP,
+ "value": MIN_AGE,
+ }
+ ),
+ )
+
+ age_till = forms.IntegerField(
+ required=False,
+ min_value=MIN_AGE,
+ max_value=MAX_AGE,
+ widget=forms.NumberInput(
+ attrs={
+ "type": "range",
+ "step": AGE_STEP,
+ "value": MAX_AGE,
+ }
+ ),
+ )
+
+ return age_from, age_till
diff --git a/radis/core/static/core/core.js b/radis/core/static/core/core.js
index cfff69a0..31468d10 100644
--- a/radis/core/static/core/core.js
+++ b/radis/core/static/core/core.js
@@ -46,6 +46,23 @@ document.addEventListener("alpine:init", () => {
});
});
+document.addEventListener("DOMContentLoaded", () => {
+ const preventAttr = "[data-prevent-enter-submit]";
+ document.querySelectorAll(preventAttr).forEach((formEl) => {
+ formEl.addEventListener("keydown", (event) => {
+ if (event.key !== "Enter") {
+ return;
+ }
+ const target = event.target;
+ const isTextInput =
+ target instanceof HTMLInputElement &&
+ !["submit", "button"].includes(target.type);
+ if (isTextInput) {
+ event.preventDefault();
+ }
+ });
+ });
+});
/**
* An Alpine component that controls a Django FormSet
*
@@ -66,15 +83,14 @@ function FormSet(rootEl) {
formCount: parseInt(totalForms.value),
minForms: parseInt(minForms.value),
maxForms: parseInt(maxForms.value),
- init() {
- console.log(this.formCount);
- },
+ init() {},
addForm() {
- const newForm = template.content.cloneNode(true);
+ if (!template) {
+ return;
+ }
const idx = totalForms.value;
- container.append(newForm);
- const lastForm = container.querySelector(".formset-form:last-child");
- lastForm.innerHTML = lastForm.innerHTML.replace(/__prefix__/g, idx);
+ const html = template.innerHTML.replace(/__prefix__/g, idx);
+ container.insertAdjacentHTML("beforeend", html);
totalForms.value = (parseInt(idx) + 1).toString();
this.formCount = parseInt(totalForms.value);
},
diff --git a/radis/core/templates/cotton/formset.html b/radis/core/templates/cotton/formset.html
index 20c6af7b..95ea87bd 100644
--- a/radis/core/templates/cotton/formset.html
+++ b/radis/core/templates/cotton/formset.html
@@ -7,7 +7,9 @@
{% crispy formset.empty_form %}
diff --git a/radis/extractions/constants.py b/radis/extractions/constants.py
new file mode 100644
index 00000000..bacfd27c
--- /dev/null
+++ b/radis/extractions/constants.py
@@ -0,0 +1 @@
+MAX_SELECTION_OPTIONS = 7
diff --git a/radis/extractions/factories.py b/radis/extractions/factories.py
index 6994a8cd..1aa0c931 100644
--- a/radis/extractions/factories.py
+++ b/radis/extractions/factories.py
@@ -1,15 +1,16 @@
+from typing import cast
+
import factory
+from adit_radis_shared.accounts.factories import GroupFactory, UserFactory
+from adit_radis_shared.accounts.models import User
+from adit_radis_shared.common.utils.testing_helpers import add_user_to_group
+from django.contrib.auth.models import Group
+from factory.declarations import SKIP
from faker import Faker
from radis.reports.factories import ModalityFactory
-from .models import (
- ExtractionInstance,
- ExtractionJob,
- ExtractionTask,
- OutputField,
- OutputType,
-)
+from .models import ExtractionInstance, ExtractionJob, ExtractionTask, OutputField, OutputType
fake = Faker()
@@ -26,8 +27,9 @@ class ExtractionJobFactory(BaseDjangoModelFactory):
class Meta:
model = ExtractionJob
+ owner = factory.SubFactory(UserFactory)
title = factory.Faker("sentence", nb_words=3)
- group = factory.SubFactory("adit_radis_shared.accounts.factories.GroupFactory")
+ group = factory.SubFactory(GroupFactory)
query = factory.Faker("word")
language = factory.SubFactory("radis.reports.factories.LanguageFactory")
study_date_from = factory.Faker("date")
@@ -56,15 +58,34 @@ def modalities(self, create, extracted, **kwargs):
# django_get_or_create would not be respected then
self.modalities.add(ModalityFactory(code=modality)) # type: ignore
+ @factory.post_generation
+ def ensure_owner_in_group(obj, create, extracted, **kwargs):
+ owner = cast(User, obj.owner)
+ group = cast(Group, obj.group)
+
+ if not create:
+ return
+
+ add_user_to_group(owner, group)
+
class OutputFieldFactory(BaseDjangoModelFactory[OutputField]):
class Meta:
model = OutputField
- job = factory.SubFactory("radis.extractions.factories.ExtractionJobFactory")
+ # Use factory.Maybe to conditionally create job only when subscription is None
+ job = factory.Maybe(
+ factory.SelfAttribute("subscription"),
+ yes_declaration=SKIP, # If subscription exists, skip job creation
+ no_declaration=factory.SubFactory("radis.extractions.factories.ExtractionJobFactory"), # type: ignore[arg-type]
+ )
+ subscription = None
name = factory.Sequence(lambda n: f"output_field_{n}")
description = factory.Faker("sentence", nb_words=10)
output_type = factory.Faker("random_element", elements=[a[0] for a in OutputType.choices])
+ selection_options = factory.LazyAttribute(
+ lambda obj: ["Option 1", "Option 2"] if obj.output_type == OutputType.SELECTION else []
+ )
class ExtractionTaskFactory(BaseDjangoModelFactory[ExtractionTask]):
diff --git a/radis/extractions/forms.py b/radis/extractions/forms.py
index 7a1d559d..0d9c6162 100644
--- a/radis/extractions/forms.py
+++ b/radis/extractions/forms.py
@@ -1,21 +1,27 @@
+import json
from typing import Any, cast
from adit_radis_shared.accounts.models import User
from crispy_forms.helper import FormHelper
-from crispy_forms.layout import Column, Layout, Row, Submit
+from crispy_forms.layout import HTML, Column, Div, Field, Layout, Row, Submit
from django import forms
from django.conf import settings
from django.db.models import QuerySet
-from radis.core.constants import LANGUAGE_LABELS
+from radis.core.form_fields import (
+ create_age_range_fields,
+ create_language_field,
+ create_modality_field,
+)
from radis.core.layouts import RangeSlider
from radis.reports.models import Language, Modality
-from radis.search.forms import AGE_STEP, MAX_AGE, MIN_AGE
from radis.search.site import Search, SearchFilters
from radis.search.utils.query_parser import QueryParser
-from .models import ExtractionJob, OutputField
+from .constants import MAX_SELECTION_OPTIONS
+from .models import ExtractionJob, OutputField, OutputType
from .site import extraction_retrieval_provider
+from .utils.validation import validate_selection_options
class SearchForm(forms.ModelForm):
@@ -35,7 +41,14 @@ class Meta:
]
help_texts = {
"title": "Title of the extraction job",
- "query": "A query to find reports for further analysis",
+ "query": (
+ "Search query to filter reports. "
+ "This query was auto-generated from your extraction fields"
+ " - you can edit or refine it."
+ ),
+ }
+ widgets = {
+ "query": forms.TextInput(attrs={"placeholder": "Auto-generated query (editable)"}),
}
def __init__(self, *args, **kwargs):
@@ -43,41 +56,14 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.fields["language"].choices = [ # type: ignore
- (language.pk, LANGUAGE_LABELS[language.code])
- for language in Language.objects.order_by("code")
- ]
- self.fields["modalities"].choices = [ # type: ignore
- (modality.pk, modality.code)
- for modality in Modality.objects.filter(filterable=True).order_by("code")
- ]
- self.fields["modalities"].widget.attrs["size"] = 6
+ self.fields["query"].required = True
+ self.fields["language"] = create_language_field()
+ self.fields["modalities"] = create_modality_field()
self.fields["study_date_from"].widget = forms.DateInput(attrs={"type": "date"})
self.fields["study_date_till"].widget = forms.DateInput(attrs={"type": "date"})
- self.fields["age_from"] = forms.IntegerField(
- required=False,
- min_value=MIN_AGE,
- max_value=MAX_AGE,
- widget=forms.NumberInput(
- attrs={
- "type": "range",
- "step": AGE_STEP,
- "value": MIN_AGE,
- }
- ),
- )
- self.fields["age_till"] = forms.IntegerField(
- required=False,
- min_value=MIN_AGE,
- max_value=MAX_AGE,
- widget=forms.NumberInput(
- attrs={
- "type": "range",
- "step": AGE_STEP,
- "value": MAX_AGE,
- }
- ),
- )
+ age_from, age_till = create_age_range_fields()
+ self.fields["age_from"] = age_from
+ self.fields["age_till"] = age_till
self.helper = FormHelper()
self.helper.form_tag = False
@@ -89,8 +75,12 @@ def build_layout(self):
Row(
Column(
"title",
+ # Query generation section (async HTMX)
+ HTML('{% include "extractions/_query_generation_section.html" %}'),
"query",
- Submit("next", "Next Step (Output Fields)", css_class="btn-primary"),
+ # Preview div from template include
+ HTML('{% include "extractions/_search_preview_form_section.html" %}'),
+ Submit("next", "Next Step (Summary)", css_class="btn-primary"),
),
Column(
"language",
@@ -107,29 +97,37 @@ def build_layout(self):
)
def clean_query(self) -> str:
- query = self.cleaned_data["query"]
- query_node, _ = QueryParser().parse(query)
+ query = self.cleaned_data["query"].strip()
+ if not query:
+ raise forms.ValidationError(
+ "A search query is required. "
+ "Please enter a query or go back to regenerate from fields."
+ )
+ query_node, fixes = QueryParser().parse(query)
if query_node is None:
- raise forms.ValidationError("Invalid empty query")
+ raise forms.ValidationError("Invalid query syntax")
+ else:
+ self.cleaned_data["query_node"] = query_node
+ if len(fixes) > 0:
+ query = QueryParser.unparse(query_node)
return query
def clean(self) -> dict[str, Any] | None:
cleaned_data = super().clean()
assert cleaned_data
+ # If query validation failed, query_node won't exist - exit early
+ if "query_node" not in cleaned_data:
+ return cleaned_data
+
active_group = self.user.active_group
language = cast(Language, cleaned_data["language"])
modalities = cast(QuerySet[Modality], cleaned_data["modalities"])
- query_node, fixes = QueryParser().parse(cleaned_data["query"])
- assert query_node
-
- if len(fixes) > 0:
- cleaned_data["fixed_query"] = QueryParser.unparse(query_node)
-
+ # Calculate retrieval count with inline Search construction
search = Search(
- query=query_node,
+ query=cleaned_data["query_node"],
offset=0,
limit=0,
filters=SearchFilters(
@@ -147,14 +145,16 @@ def clean(self) -> dict[str, Any] | None:
if extraction_retrieval_provider is None:
raise forms.ValidationError("Extraction retrieval provider is not configured.")
+
retrieval_count = extraction_retrieval_provider.count(search)
cleaned_data["retrieval_count"] = retrieval_count
+ # Validate against limits
if retrieval_count > settings.EXTRACTION_MAXIMUM_REPORTS_COUNT:
raise forms.ValidationError(
f"Your search returned more results ({retrieval_count}) than the extraction "
f"pipeline can handle (max. {settings.EXTRACTION_MAXIMUM_REPORTS_COUNT}). "
- "Please refine your search."
+ "Please refine your search query."
)
if (
@@ -170,14 +170,135 @@ def clean(self) -> dict[str, Any] | None:
class OutputFieldForm(forms.ModelForm):
+ """Hidden field to store selection options and array flag as JSON string.
+ This is done because the selection options are dynamic and the array toggle
+ is an alpine component that needs to be re-rendered on every change."""
+
+ selection_options = forms.CharField(
+ required=False,
+ widget=forms.HiddenInput(),
+ )
+ is_array = forms.CharField(
+ required=False,
+ widget=forms.HiddenInput(),
+ )
+
class Meta:
model = OutputField
fields = [
"name",
"description",
"output_type",
+ "selection_options",
+ "is_array",
]
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.fields["name"].required = True
+ self.fields["description"].required = True
+ self.fields["description"].widget = forms.Textarea(attrs={"rows": 3})
+ self.fields["selection_options"].widget.attrs.update(
+ {
+ "data-selection-input": "true",
+ "data-max-selection-options": str(MAX_SELECTION_OPTIONS),
+ }
+ )
+ self.fields["is_array"].widget.attrs.update(
+ {
+ "data-array-input": "true",
+ }
+ )
+
+ initial_options = self.instance.selection_options if self.instance.pk else []
+ self.initial["selection_options"] = json.dumps(initial_options)
+ self.initial["is_array"] = "true" if self.instance.is_array else "false"
+
+ self.helper = FormHelper()
+ self.helper.form_tag = False
+ self.helper.disable_csrf = True
+
+ # Build the layout for selection options and array toggle button using crispy.
+ fields = [
+ Field("id", type="hidden"),
+ Row(
+ Column("name", css_class="col-md-7 col-12"),
+ Column("output_type", css_class="col-md-4 col-10"),
+ Column(
+ HTML(
+ (
+ '
[ ] '
+ )
+ ),
+ css_class=(
+ "col-md-1 col-2 d-flex align-items-center "
+ "justify-content-end array-toggle-field"
+ ),
+ ),
+ css_class="g-3 align-items-center",
+ ),
+ "description",
+ # Include the selection options widget partial template here.
+ Div(
+ HTML('{% include "extractions/_selection_options_field.html" %}'),
+ css_class="selection-options-wrapper",
+ ),
+ ]
+
+ if "DELETE" in self.fields:
+ fields.insert(1, Field("DELETE", type="hidden"))
+
+ self.helper.layout = Layout(Div(*fields))
+
+ def clean_selection_options(self) -> list[str]:
+ raw_value = self.cleaned_data.get("selection_options") or ""
+ raw_value = raw_value.strip()
+ if raw_value == "":
+ return []
+
+ try:
+ parsed = json.loads(raw_value)
+ except json.JSONDecodeError as exc:
+ raise forms.ValidationError("Invalid selection data.") from exc
+
+ return validate_selection_options(parsed)
+
+ def clean_is_array(self) -> bool:
+ raw_value = (self.cleaned_data.get("is_array") or "").strip().lower()
+ if raw_value in {"1", "true", "on"}:
+ return True
+ return False
+
+ def clean(self):
+ cleaned_data = super().clean()
+ if not cleaned_data:
+ return cleaned_data
+
+ output_type = cleaned_data.get("output_type")
+ selection_options: list[str] = cleaned_data.get("selection_options") or []
+
+ if output_type == OutputType.SELECTION:
+ if not selection_options:
+ self.add_error(
+ "selection_options",
+ "Add at least one selection to use the Selection type.",
+ )
+ else:
+ if selection_options:
+ self.add_error(
+ "selection_options",
+ "Selections are only allowed when Output Type is Selection.",
+ )
+ cleaned_data["selection_options"] = []
+
+ return cleaned_data
+
OutputFieldFormSet = forms.inlineformset_factory(
ExtractionJob,
diff --git a/radis/extractions/migrations/0004_outputfield_selection_options.py b/radis/extractions/migrations/0004_outputfield_selection_options.py
new file mode 100644
index 00000000..f01877a8
--- /dev/null
+++ b/radis/extractions/migrations/0004_outputfield_selection_options.py
@@ -0,0 +1,29 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("extractions", "0003_alter_extractionjob_options_and_more"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="outputfield",
+ name="selection_options",
+ field=models.JSONField(blank=True, default=list),
+ ),
+ migrations.AlterField(
+ model_name="outputfield",
+ name="output_type",
+ field=models.CharField(
+ choices=[
+ ("T", "Text"),
+ ("N", "Numeric"),
+ ("B", "Boolean"),
+ ("S", "Selection"),
+ ],
+ default="T",
+ max_length=1,
+ ),
+ ),
+ ]
diff --git a/radis/extractions/migrations/0005_outputfield_is_array.py b/radis/extractions/migrations/0005_outputfield_is_array.py
new file mode 100644
index 00000000..568a02c3
--- /dev/null
+++ b/radis/extractions/migrations/0005_outputfield_is_array.py
@@ -0,0 +1,15 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("extractions", "0004_outputfield_selection_options"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="outputfield",
+ name="is_array",
+ field=models.BooleanField(default=False),
+ ),
+ ]
diff --git a/radis/extractions/migrations/0004_remove_extractionjob_provider.py b/radis/extractions/migrations/0006_remove_extractionjob_provider.py
similarity index 80%
rename from radis/extractions/migrations/0004_remove_extractionjob_provider.py
rename to radis/extractions/migrations/0006_remove_extractionjob_provider.py
index 24640eaa..84c59f4c 100644
--- a/radis/extractions/migrations/0004_remove_extractionjob_provider.py
+++ b/radis/extractions/migrations/0006_remove_extractionjob_provider.py
@@ -6,7 +6,7 @@
class Migration(migrations.Migration):
dependencies = [
- ("extractions", "0003_alter_extractionjob_options_and_more"),
+ ("extractions", "0005_outputfield_is_array"),
]
operations = [
diff --git a/radis/extractions/migrations/0007_remove_outputfield_unique.py b/radis/extractions/migrations/0007_remove_outputfield_unique.py
new file mode 100644
index 00000000..bc12db2d
--- /dev/null
+++ b/radis/extractions/migrations/0007_remove_outputfield_unique.py
@@ -0,0 +1,33 @@
+# Generated by Django 5.2.8 on 2025-11-17 23:19
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("extractions", "0006_remove_extractionjob_provider"),
+ ]
+
+ operations = [
+ migrations.RemoveConstraint(
+ model_name="outputfield",
+ name="unique_output_field_name_per_job",
+ ),
+ migrations.RemoveField(
+ model_name="outputfield",
+ name="optional",
+ ),
+ migrations.AlterField(
+ model_name="outputfield",
+ name="job",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="output_fields",
+ to="extractions.extractionjob",
+ ),
+ ),
+ ]
diff --git a/radis/extractions/migrations/0008_outputfield_subscription_and_more.py b/radis/extractions/migrations/0008_outputfield_subscription_and_more.py
new file mode 100644
index 00000000..11027f91
--- /dev/null
+++ b/radis/extractions/migrations/0008_outputfield_subscription_and_more.py
@@ -0,0 +1,53 @@
+# Generated by Django 5.2.8 on 2025-11-17 23:39
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("extractions", "0007_remove_outputfield_unique"),
+ ("subscriptions", "0011_rename_answers_subscribeditem_filter_results_and_more"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="outputfield",
+ name="subscription",
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="output_fields",
+ to="subscriptions.subscription",
+ ),
+ ),
+ migrations.AddConstraint(
+ model_name="outputfield",
+ constraint=models.UniqueConstraint(
+ condition=models.Q(("job__isnull", False)),
+ fields=("name", "job_id"),
+ name="unique_output_field_name_per_job",
+ ),
+ ),
+ migrations.AddConstraint(
+ model_name="outputfield",
+ constraint=models.UniqueConstraint(
+ condition=models.Q(("subscription__isnull", False)),
+ fields=("name", "subscription_id"),
+ name="unique_output_field_name_per_subscription",
+ ),
+ ),
+ migrations.AddConstraint(
+ model_name="outputfield",
+ constraint=models.CheckConstraint(
+ condition=models.Q(
+ models.Q(("job__isnull", False), ("subscription__isnull", True)),
+ models.Q(("job__isnull", True), ("subscription__isnull", False)),
+ _connector="OR",
+ ),
+ name="output_field_exactly_one_parent",
+ ),
+ ),
+ ]
diff --git a/radis/extractions/models.py b/radis/extractions/models.py
index 97e6fa98..dc0f972e 100644
--- a/radis/extractions/models.py
+++ b/radis/extractions/models.py
@@ -4,6 +4,7 @@
from django.conf import settings
from django.contrib.auth.models import Group
from django.db import models
+from django.db.models import Q
from django.urls import reverse
from procrastinate.contrib.django import app
from procrastinate.contrib.django.models import ProcrastinateJob
@@ -75,6 +76,7 @@ class OutputType(models.TextChoices):
TEXT = "T", "Text"
NUMERIC = "N", "Numeric"
BOOLEAN = "B", "Boolean"
+ SELECTION = "S", "Selection"
class OutputField(models.Model):
@@ -84,22 +86,64 @@ class OutputField(models.Model):
max_length=1, choices=OutputType.choices, default=OutputType.TEXT
)
get_output_type_display: Callable[[], str]
- optional = models.BooleanField(default=False)
+ selection_options = models.JSONField(default=list, blank=True)
+ is_array = models.BooleanField(default=False)
job = models.ForeignKey[ExtractionJob](
- ExtractionJob, on_delete=models.CASCADE, related_name="output_fields"
+ ExtractionJob, null=True, blank=True, on_delete=models.CASCADE, related_name="output_fields"
+ )
+ subscription = models.ForeignKey(
+ "subscriptions.Subscription",
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="output_fields",
)
class Meta:
constraints = [
models.UniqueConstraint(
fields=["name", "job_id"],
+ condition=Q(job__isnull=False),
name="unique_output_field_name_per_job",
- )
+ ),
+ models.UniqueConstraint(
+ fields=["name", "subscription_id"],
+ condition=Q(subscription__isnull=False),
+ name="unique_output_field_name_per_subscription",
+ ),
+ models.CheckConstraint(
+ condition=(
+ Q(job__isnull=False, subscription__isnull=True)
+ | Q(job__isnull=True, subscription__isnull=False)
+ ),
+ name="output_field_exactly_one_parent",
+ ),
]
def __str__(self) -> str:
return f'Output Field "{self.name}" [{self.pk}]'
+ def clean(self) -> None:
+ from django.core.exceptions import ValidationError
+
+ from radis.extractions.utils.validation import validate_selection_options
+
+ super().clean()
+
+ if self.output_type == OutputType.SELECTION:
+ if not self.selection_options:
+ raise ValidationError({"selection_options": "Add at least one selection option."})
+ try:
+ self.selection_options = validate_selection_options(self.selection_options)
+ except ValidationError as e:
+ raise ValidationError({"selection_options": e.message})
+ else:
+ if self.selection_options:
+ raise ValidationError(
+ {"selection_options": "Selections are only allowed for the Selection type."}
+ )
+ self.selection_options = []
+
class ExtractionTask(AnalysisTask):
job = models.ForeignKey[ExtractionJob](
diff --git a/radis/extractions/processors.py b/radis/extractions/processors.py
index e4c7d886..ee2e7a31 100644
--- a/radis/extractions/processors.py
+++ b/radis/extractions/processors.py
@@ -46,11 +46,12 @@ def process_instance(self, instance: ExtractionInstance) -> None:
def process_output_fields(self, instance: ExtractionInstance) -> None:
job = instance.task.job
- Schema = generate_output_fields_schema(job.output_fields)
+ output_fields = list(job.output_fields.order_by("pk"))
+ Schema = generate_output_fields_schema(output_fields)
prompt = Template(settings.OUTPUT_FIELDS_SYSTEM_PROMPT).substitute(
{
"report": instance.text,
- "fields": generate_output_fields_prompt(job.output_fields),
+ "fields": generate_output_fields_prompt(output_fields),
}
)
result = self.client.extract_data(prompt.strip(), Schema)
diff --git a/radis/extractions/static/extractions/extractions.css b/radis/extractions/static/extractions/extractions.css
index 17c43b36..c2164fdc 100644
--- a/radis/extractions/static/extractions/extractions.css
+++ b/radis/extractions/static/extractions/extractions.css
@@ -1,3 +1,38 @@
#filters .asteriskField {
display: none;
}
+
+/*
+ * Selection Options Component
+ */
+
+.selection-options-controls,
+.selection-options-actions {
+ gap: 0.5rem;
+}
+
+.array-toggle-field {
+ display: flex;
+ align-items: center;
+ justify-content: flex-end;
+ padding-top: 14px;
+}
+
+.array-toggle-btn {
+ width: calc(2.5rem - 6px);
+ height: calc(2.5rem - 6px);
+ border-radius: 50%;
+ font-weight: 600;
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ padding: 0;
+ line-height: 1;
+ font-size: 1rem;
+}
+
+.array-toggle-btn.active {
+ background-color: var(--bs-primary);
+ border-color: var(--bs-primary);
+ color: #fff;
+}
diff --git a/radis/extractions/static/extractions/extractions.js b/radis/extractions/static/extractions/extractions.js
index e69de29b..0d400738 100644
--- a/radis/extractions/static/extractions/extractions.js
+++ b/radis/extractions/static/extractions/extractions.js
@@ -0,0 +1,141 @@
+/**
+ * Manages the dynamic selection options input for extraction output fields.
+ *
+ * @param {HTMLElement} rootEl
+ * @returns {Object}
+ */
+function SelectionOptions(rootEl) {
+ const hiddenInput = rootEl.querySelector("[data-selection-input]");
+ const arrayInput = rootEl.querySelector("[data-array-input]");
+ const formContainer =
+ rootEl.closest(".formset-form") ?? rootEl.closest("form") ?? rootEl;
+ const outputTypeField =
+ formContainer.querySelector('select[name$="-output_type"]') ??
+ formContainer.querySelector('select[name="output_type"]');
+ const arrayToggleButton =
+ formContainer.querySelector("[data-array-toggle]") ?? null;
+ const parseArrayValue = (value) => {
+ if (!value) {
+ return false;
+ }
+ const normalized = value.trim().toLowerCase();
+ return normalized === "true" || normalized === "1" || normalized === "on";
+ };
+ const parseMaxOptions = () => {
+ const datasetValue =
+ hiddenInput?.dataset.maxSelectionOptions ??
+ rootEl.dataset.maxSelectionOptions ??
+ "";
+ const parsed = Number.parseInt(datasetValue, 10);
+ return Number.isNaN(parsed) ? 0 : parsed;
+ };
+ const initialMaxOptions = parseMaxOptions();
+
+ return {
+ options: [],
+ maxOptions: initialMaxOptions,
+ supportsSelection: false,
+ isArray: parseArrayValue(arrayInput?.value),
+ lastSelectionOptions: [],
+ init() {
+ this.options = this.parseOptions(hiddenInput?.value);
+ this.isArray = parseArrayValue(arrayInput?.value);
+ this.lastSelectionOptions = [...this.options];
+ this.updateSupports();
+ if (arrayToggleButton) {
+ arrayToggleButton.addEventListener("click", (event) => {
+ event.preventDefault();
+ this.toggleArray();
+ });
+ this.updateArrayButton();
+ }
+ if (outputTypeField) {
+ outputTypeField.addEventListener("change", () => {
+ const wasSelection = this.supportsSelection;
+ this.updateSupports();
+ if (!this.supportsSelection) {
+ this.lastSelectionOptions = [...this.options];
+ this.options = [];
+ } else if (!wasSelection && this.options.length === 0) {
+ if (this.lastSelectionOptions.length > 0) {
+ this.options = [...this.lastSelectionOptions];
+ } else {
+ this.options = this.parseOptions(hiddenInput?.value);
+ }
+ }
+ });
+ }
+ },
+ parseOptions(value) {
+ if (!value) {
+ return [];
+ }
+ try {
+ const parsed = JSON.parse(value);
+ if (Array.isArray(parsed)) {
+ return parsed
+ .map((opt) => (typeof opt === "string" ? opt : ""))
+ .filter((opt) => opt !== "");
+ }
+ } catch (err) {
+ console.warn("Invalid selection options payload", err);
+ }
+ return [];
+ },
+ updateSupports() {
+ this.supportsSelection = outputTypeField
+ ? outputTypeField.value === "S"
+ : false;
+ },
+ syncOptions() {
+ if (!hiddenInput) {
+ return;
+ }
+ const sanitized = this.options
+ .map((opt) => (typeof opt === "string" ? opt.trim() : ""))
+ .filter((opt) => opt !== "");
+ hiddenInput.value = JSON.stringify(sanitized);
+ this.lastSelectionOptions = [...sanitized];
+ },
+ syncArray() {
+ if (!arrayInput) {
+ return;
+ }
+ arrayInput.value = this.isArray ? "true" : "false";
+ },
+ syncState() {
+ this.syncOptions();
+ this.syncArray();
+ this.updateArrayButton();
+ },
+ addOption() {
+ if (!this.supportsSelection || this.options.length >= this.maxOptions) {
+ return;
+ }
+ this.options.push("");
+ this.$nextTick(() => {
+ const inputs = rootEl.querySelectorAll("[data-selection-option-input]");
+ const lastInput = inputs[inputs.length - 1];
+ if (lastInput instanceof HTMLInputElement) {
+ lastInput.focus();
+ }
+ });
+ },
+ removeOption(index) {
+ this.options.splice(index, 1);
+ },
+ toggleArray() {
+ this.isArray = !this.isArray;
+ },
+ updateArrayButton() {
+ if (!arrayToggleButton) {
+ return;
+ }
+ arrayToggleButton.classList.toggle("active", this.isArray);
+ arrayToggleButton.setAttribute(
+ "aria-pressed",
+ this.isArray ? "true" : "false"
+ );
+ },
+ };
+}
diff --git a/radis/extractions/templates/extractions/_query_generation_result.html b/radis/extractions/templates/extractions/_query_generation_result.html
new file mode 100644
index 00000000..6205dca0
--- /dev/null
+++ b/radis/extractions/templates/extractions/_query_generation_result.html
@@ -0,0 +1,44 @@
+{% load bootstrap_icon from common_extras %}
+{% if error %}
+
+
{% bootstrap_icon "exclamation-triangle" %} Query Generation Failed
+
{{ error }}
+
+
+ You can manually enter a search query below.
+ The query will help find relevant medical reports for your extraction.
+
+
+
+{% elif generated_query %}
+
+
{% bootstrap_icon "check-circle" %} Query Generated Successfully
+
+ Based on your {{ output_fields_count }} extraction field{{ output_fields_count|pluralize }},
+ we generated the following search query:
+
+
{{ generated_query }}
+
+
+ You can edit this query below to refine your search.
+ The query will help find relevant medical reports for your extraction.
+
+
+
+ {# Update the query input field using out-of-band swap #}
+
+{% else %}
+
+
{% bootstrap_icon "info-circle" %} No Query Generated
+
+ Please manually enter a search query below.
+
+
+{% endif %}
diff --git a/radis/extractions/templates/extractions/_query_generation_section.html b/radis/extractions/templates/extractions/_query_generation_section.html
new file mode 100644
index 00000000..1a73b3b2
--- /dev/null
+++ b/radis/extractions/templates/extractions/_query_generation_section.html
@@ -0,0 +1,21 @@
+{% load bootstrap_icon from common_extras %}
+
+
+
+
+
+
+
{% bootstrap_icon "magic" %} Generating Search Query...
+
+ Using AI to create a search query from your extraction fields...
+
+
+
+
+
diff --git a/radis/extractions/templates/extractions/_search_preview.html b/radis/extractions/templates/extractions/_search_preview.html
new file mode 100644
index 00000000..d5b4354e
--- /dev/null
+++ b/radis/extractions/templates/extractions/_search_preview.html
@@ -0,0 +1,36 @@
+{% load bootstrap_icon from common_extras %}
+
+{% if error %}
+
+ {% bootstrap_icon "exclamation-triangle" %}
+ {{ error }}
+
+{% elif count is None %}
+
+ {% bootstrap_icon "info-circle" %}
+ Enter a query to see the number of reports that will be retrieved.
+
+{% else %}
+
+
+ {% bootstrap_icon "check-circle" %}
+ {{ count }} report{{ count|pluralize }} will be retrieved
+ {% if count > max_reports_limit %}
+
+
+ {% bootstrap_icon "exclamation-triangle" %}
+ This exceeds the maximum limit of {{ max_reports_limit }} reports. Please refine your query.
+
+ {% endif %}
+
+ {% if search_url %}
+
+ {% bootstrap_icon "box-arrow-up-right" %}
+ Preview Search Results
+
+ {% endif %}
+
+{% endif %}
diff --git a/radis/extractions/templates/extractions/_search_preview_form_section.html b/radis/extractions/templates/extractions/_search_preview_form_section.html
new file mode 100644
index 00000000..5b73b2a0
--- /dev/null
+++ b/radis/extractions/templates/extractions/_search_preview_form_section.html
@@ -0,0 +1,15 @@
+{% load bootstrap_icon from common_extras %}
+
+
+
+
+ Loading search preview...
+
+
diff --git a/radis/extractions/templates/extractions/_selection_options_field.html b/radis/extractions/templates/extractions/_selection_options_field.html
new file mode 100644
index 00000000..e1c81a9b
--- /dev/null
+++ b/radis/extractions/templates/extractions/_selection_options_field.html
@@ -0,0 +1,46 @@
+{% load bootstrap_icon from common_extras %}
+
+ {{ form.selection_options }}
+ {{ form.is_array }}
+
+
+
+ {% bootstrap_icon "plus-lg" %}
+ Enter a selection
+
+
+
+
+
+
+
+
+ {% bootstrap_icon "trash" %}
+
+
+
No selections defined yet.
+
+
+ Choose the “Selection” output type to define enumerated values.
+
+ {% if form.selection_options.errors %}
+
+ {% for error in form.selection_options.errors %}{{ error }}{% endfor %}
+
+ {% endif %}
+
diff --git a/radis/extractions/templates/extractions/extraction_job_detail.html b/radis/extractions/templates/extractions/extraction_job_detail.html
index 942a2fc8..3a083766 100644
--- a/radis/extractions/templates/extractions/extraction_job_detail.html
+++ b/radis/extractions/templates/extractions/extraction_job_detail.html
@@ -114,6 +114,20 @@
Output Fields
{{ field.output_type|human_readable_output_type }}
+ {% if field.is_array %}
+
Array Output
+
+ Yes — return multiple {{ field.output_type|human_readable_output_type|lower }} values
+
+ {% endif %}
+ {% if field.selection_options %}
+
Selections
+
+
+ {% for option in field.selection_options %}{{ option }} {% endfor %}
+
+
+ {% endif %}
{% endfor %}
@@ -121,11 +135,18 @@
Output Fields
{% if not job.is_preparing %}
-
- {% bootstrap_icon "eye" %}
- View Results
-
+
{% crispy filter.form %}
diff --git a/radis/extractions/templates/extractions/extraction_job_output_fields_form.html b/radis/extractions/templates/extractions/extraction_job_output_fields_form.html
index 4ab54deb..83fbca03 100644
--- a/radis/extractions/templates/extractions/extraction_job_output_fields_form.html
+++ b/radis/extractions/templates/extractions/extraction_job_output_fields_form.html
@@ -5,7 +5,7 @@
New Extraction Job
{% endblock title %}
{% block heading %}
-
+
Previous Step (Search)
- Next Step (Summary)
+ class="btn btn-secondary"
+ disabled>Previous Step
+ Next Step (Search Query)