Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
b85bdff
use 'BaseSettings' instead of 'BaseModel' as parent class for 'VlmRes…
jennifer-bowser Dec 17, 2025
2d8260c
updates comments + descriptions for vlm schema classes
jennifer-bowser Dec 17, 2025
45d2086
remove unnecessary fields 'setType' and 'exists' from instantiation o…
jennifer-bowser Dec 17, 2025
453eafc
simplify logic for using env vars for values + preventing overrides
jennifer-bowser Dec 18, 2025
bc3e3b3
add '.env.example' file with GREGoR-specific valules required for 'Vl…
jennifer-bowser Dec 18, 2025
8a80567
use 'default_factory' instead of 'default' for attributes configured …
jennifer-bowser Dec 18, 2025
8d32e34
add required env vars to GitHub workflow
jennifer-bowser Dec 18, 2025
cf62b44
modify 'ResultSet' to prevent inadvertently overriding static fields
jennifer-bowser Dec 18, 2025
4d6c1fc
update comments for clairity
jennifer-bowser Dec 18, 2025
bdb6e53
Merge branch 'main' into issue-27-enable-config-of-node-specific-fiel…
jennifer-bowser Dec 19, 2025
d39ef0e
update arg name for 'ResultSet.id' > 'ResultSet.resultset_id'
jennifer-bowser Dec 19, 2025
a8ad18c
use Pydantic 'SettingsConfigDict' instead of custom func to set attri…
jennifer-bowser Dec 19, 2025
37fb5e5
moves logic to pull info from env vars out of 'VlmResponse' classes a…
jennifer-bowser Dec 22, 2025
0fa0571
fix tests
jennifer-bowser Dec 22, 2025
913bb7b
reformat description string to avoid wonky whitespace formatting
jennifer-bowser Dec 23, 2025
4ffaf89
refactor test to move fixture into the test where it's used
jennifer-bowser Dec 23, 2025
734b61f
Merge branch 'main' into issue-27-enable-config-of-node-specific-fiel…
jsstevenson Dec 23, 2025
f576b09
resolve merge conflicts
jennifer-bowser Dec 29, 2025
4e9d85d
resolve mere conflicts
jennifer-bowser Dec 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@ ANYVLM_STORAGE_URI=postgresql://anyvlm:anyvlm-pw@localhost:5435/anyvlm

## Testing - see "Contributing" -> "Testing" in the docs
ANYVLM_TEST_STORAGE_URI=postgresql://anyvlm_test:anyvlm-test-pw@localhost:5436/anyvlm_test

###########################
## VLM RESPONSE SETTINGS ##
###########################
HANDOVER_TYPE_ID="GREGoR-NCH"
HANDOVER_TYPE_LABEL="GREGoR AnyVLM Reference"
BEACON_HANDOVER_URL="https://variants.gregorconsortium.org/"
BEACON_NODE_ID="org.anyvlm.gregor"
6 changes: 5 additions & 1 deletion .github/workflows/python-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,12 @@ jobs:
- name: Run tests
run: uv run pytest
env:
ANYVLM_ANYVAR_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
ANYVLM_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
ANYVLM_ANYVAR_TEST_STORAGE_URI: postgresql://postgres:postgres@localhost:5432/postgres
HANDOVER_TYPE_ID: GREGoR-NCH
HANDOVER_TYPE_LABEL: "GREGoR AnyVLM Reference"
BEACON_HANDOVER_URL: https://variants.gregorconsortium.org/
BEACON_NODE_ID: org.anyvlm.gregor
lint:
name: lint
runs-on: ubuntu-latest
Expand Down
53 changes: 52 additions & 1 deletion src/anyvlm/functions/build_vlm_response.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
"""Craft a VlmResponse object from a list of CohortAlleleFrequencyStudyResults"""

import os

from anyvlm.schemas.vlm import (
BeaconHandover,
HandoverType,
ResponseField,
ResponseSummary,
VlmResponse,
)
from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult


class MissingEnvironmentVariableError(Exception):
"""Raised when a required environment variable is not set."""


def _get_environment_var(key: str) -> str:
"""Retrieves an environment variable, raising an error if it is not set.

:param key: The key for the environment variable
:returns: The value for the environment variable of the provided `key`
:raises: MissingEnvironmentVariableError if environment variable is not found.
"""
value: str | None = os.environ.get(key)
if not value:
message = f"Missing required environment variable: {key}"
raise MissingEnvironmentVariableError(message)
return value


def build_vlm_response_from_caf_data(
caf_data: list[AnyVlmCohortAlleleFrequencyResult],
) -> VlmResponse:
Expand All @@ -14,4 +38,31 @@ def build_vlm_response_from_caf_data(
:param caf_data: A list of `AnyVlmCohortAlleleFrequencyResult` objects that will be used to build the VlmResponse
:return: A `VlmResponse` object.
"""
raise NotImplementedError # TODO: Implement this during/after Issue #16
raise NotImplementedError # TODO: Remove this and finish implementing this function in Issue #35

# TODO - create `handover_type` and `beacon_handovers` dynamically,
# instead of pulling from environment variables. See Issue #37.
handover_type = HandoverType(
id=_get_environment_var("HANDOVER_TYPE_ID"),
label=_get_environment_var("HANDOVER_TYPE_LABEL"),
)

beacon_handovers: list[BeaconHandover] = [
BeaconHandover(
handoverType=handover_type, url=_get_environment_var("BEACON_HANDOVER_URL")
)
]

num_results = len(caf_data)
response_summary = ResponseSummary(
exists=num_results > 0, numTotalResults=num_results
)

# TODO - create this field in Issue #35
response_field = ResponseField()

return VlmResponse(
beaconHandovers=beacon_handovers,
responseSummary=response_summary,
response=response_field,
)
61 changes: 44 additions & 17 deletions src/anyvlm/schemas/vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,36 @@
from typing import ClassVar, Literal, Self

from pydantic import BaseModel, ConfigDict, Field, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

from anyvlm.utils.types import Zygosity

# ruff: noqa: N815 (allows camelCase vars instead of snake_case to align with expected VLM protocol response)
# ruff: noqa: N815, N803, D107 (allow camelCase instead of snake_case to align with expected VLM protocol response + don't require init docstrings)

RESULT_ENTITY_TYPE = "genomicVariant"


class HandoverType(BaseModel):
"""The type of handover the parent `BeaconHandover` represents."""

id: str = Field(
default="gregor", description="Node-specific identifier"
) # TODO: enable configuration of this field. See Issue #27.
id: str = Field(default="gregor", description="Node-specific identifier")
label: str = Field(
default="GREGoR AnVIL browser", description="Node-specific label"
) # TODO: enable configuration of this field. See Issue #27.
description="Node-specific identifier",
)


class BeaconHandover(BaseModel):
"""Describes how users can get more information about the results provided in the parent `VlmResponse`"""

handoverType: HandoverType = HandoverType()
handoverType: HandoverType = Field(
..., description="The type of handover this represents"
)
url: str = Field(
default="https://anvil.terra.bio/#workspaces?filter=GREGoR", # TODO: enable configuration of this field. See Issue #27.
description="A url which directs users to more detailed information about the results tabulated by the API (ideally human-readable)",
"",
description="""
A url which directs users to more detailed information about the results tabulated by the API. Must be human-readable.
Ideally links directly to the variant specified in the query, but can be a generic search page if necessary.
""",
)


Expand All @@ -42,13 +46,27 @@ class ReturnedSchema(BaseModel):
schema_: str = Field(
default="ga4gh-beacon-variant-v2.0.0",
# Alias is required because 'schema' is reserved by Pydantic's BaseModel class,
# But VLM expects a field named 'schema'
# But VLM protocol expects a field named 'schema'
alias="schema",
)

model_config = ConfigDict(populate_by_name=True)


class MetaSettings(BaseSettings):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this is here and not in the config module?

Copy link
Contributor Author

@jennifer-bowser jennifer-bowser Dec 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I couldn't figure out how to use the config module to pull the beaconId from environment variables in a way that would throw an error if it's not set while also preventing this field from ever being overriden, while also making sure typecheckers know that you're not supposed to pass in a value for beaconId when instantiating the class. I also wanted to keep the metadata associated with the description field for this.

I've tried a couple different things, but this is the best I was able to figure out. I don't really love it though, I'd very much be open to suggestions if you have any!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm... I am not sure I understand the tradeoffs, but it probably doesn't matter. I think we are likely in "merge and deal with it" territory

"""Settings for 'Meta' class"""

beaconId: str = Field(..., alias="BEACON_NODE_ID")

model_config = SettingsConfigDict(
env_prefix="",
extra="ignore",
)


meta_settings = MetaSettings() # type: ignore


class Meta(BaseModel):
"""Relevant metadata about the results provided in the parent `VlmResponse`"""

Expand All @@ -57,15 +75,20 @@ class Meta(BaseModel):
description="The version of the VLM API that this response conforms to",
)
beaconId: str = Field(
default="org.gregor.beacon", # TODO: enable configuration of this field. See Issue #27.
description="""
The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. The purpose of this attribute is,
in the context of a Beacon network, to disambiguate responses coming from different Beacons. See the beacon documentation
[here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26)
""",
default="",
description=(
"The Id of a Beacon. Usually a reversed domain string, but any URI is acceptable. "
"The purpose of this attribute is,in the context of a Beacon network, to disambiguate "
"responses coming from different Beacons. See the beacon documentation "
"[here](https://github.com/ga4gh-beacon/beacon-v2/blob/c6558bf2e6494df3905f7b2df66e903dfe509500/framework/src/common/beaconCommonComponents.yaml#L26)"
),
)
returnedSchemas: list[ReturnedSchema] = [ReturnedSchema()]

# custom __init__ to prevent overriding attributes that are static or set via environment variables
def __init__(self) -> None:
super().__init__(beaconId=meta_settings.beaconId)


class ResponseSummary(BaseModel):
"""A high-level summary of the results provided in the parent `VlmResponse"""
Expand Down Expand Up @@ -104,6 +127,10 @@ class ResultSet(BaseModel):
description=f"The type of entity relevant to these results. Must always be set to '{RESULT_ENTITY_TYPE}'",
)

# custom __init__ to prevent inadvertently overriding static fields
def __init__(self, resultset_id: str, resultsCount: int) -> None:
super().__init__(id=resultset_id, resultsCount=resultsCount)


class ResponseField(BaseModel):
"""A list of ResultSets"""
Expand All @@ -116,7 +143,7 @@ class ResponseField(BaseModel):
class VlmResponse(BaseModel):
"""Define response structure for the variant_counts endpoint."""

beaconHandovers: list[BeaconHandover] = [BeaconHandover()]
beaconHandovers: list[BeaconHandover]
meta: Meta = Meta()
responseSummary: ResponseSummary
response: ResponseField
Expand Down
9 changes: 1 addition & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,7 @@
QualityMeasures,
)


@pytest.fixture(scope="session", autouse=True)
def load_env():
"""Load `.env` file.

Must set `autouse=True` to run before other fixtures or test cases.
"""
load_dotenv()
load_dotenv()


@pytest.fixture(scope="session")
Expand Down
96 changes: 58 additions & 38 deletions tests/unit/test_schemas.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Test schema validation functionality"""

import os
import re

import pytest

from anyvlm.schemas.vlm import (
RESULT_ENTITY_TYPE,
BeaconHandover,
HandoverType,
ResponseField,
ResponseSummary,
Expand All @@ -17,75 +18,94 @@

@pytest.fixture(scope="module")
def valid_handover_id() -> str:
return HandoverType().id
return os.environ.get("HANDOVER_TYPE_ID") # type: ignore


@pytest.fixture(scope="module")
def beacon_handovers(valid_handover_id: str) -> list[BeaconHandover]:
handover_type = HandoverType(
id=valid_handover_id,
label=os.environ.get("HANDOVER_TYPE_LABEL"), # type: ignore
)

return [
BeaconHandover(
handoverType=handover_type,
url=os.environ.get("BEACON_HANDOVER_URL"), # type: ignore
)
]


@pytest.fixture(scope="module")
def response_summary() -> ResponseSummary:
return ResponseSummary(exists=False, numTotalResults=0)


@pytest.fixture(scope="module")
def responses_with_invalid_resultset_ids(valid_handover_id) -> list[ResponseField]:
return [
def test_valid_resultset_id(
valid_handover_id: str,
beacon_handovers: list[BeaconHandover],
response_summary: ResponseSummary,
):
response = ResponseField(
resultSets=[
ResultSet(
resultset_id=f"{valid_handover_id} {Zygosity.HOMOZYGOUS}",
resultsCount=0,
)
]
)

# Should NOT raise an error
vlm_response = VlmResponse(
beaconHandovers=beacon_handovers,
responseSummary=response_summary,
response=response,
)

assert (
vlm_response.response.resultSets[0].id
== f"{valid_handover_id} {Zygosity.HOMOZYGOUS}"
)


def test_invalid_resultset_ids(
response_summary: ResponseSummary,
beacon_handovers: list[BeaconHandover],
):
responses_with_invalid_resultset_ids: list[ResponseField] = [
ResponseField(
resultSets=[
ResultSet(
exists=True,
id=f"invalid_handover_id {Zygosity.HOMOZYGOUS}",
resultset_id=f"invalid_handover_id {Zygosity.HOMOZYGOUS}",
resultsCount=0,
setType=RESULT_ENTITY_TYPE,
)
]
),
ResponseField(
resultSets=[
ResultSet(
exists=True,
id=f"{valid_handover_id} invalid_zygosity",
resultset_id=f"{valid_handover_id} invalid_zygosity",
resultsCount=0,
setType=RESULT_ENTITY_TYPE,
)
]
),
ResponseField(
resultSets=[
ResultSet(
exists=True,
id=f"{Zygosity.HOMOZYGOUS}-{valid_handover_id}", # incorrect order/formatting
resultset_id=f"{Zygosity.HOMOZYGOUS}-{valid_handover_id}", # incorrect order/formatting
resultsCount=0,
setType=RESULT_ENTITY_TYPE,
)
]
),
]


def test_valid_resultset_id(response_summary, valid_handover_id):
response = ResponseField(
resultSets=[
ResultSet(
exists=True,
id=f"{valid_handover_id} {Zygosity.HOMOZYGOUS}",
resultsCount=0,
setType=RESULT_ENTITY_TYPE,
)
]
)

# Should NOT raise an error
vlm_response = VlmResponse(responseSummary=response_summary, response=response)

assert (
vlm_response.response.resultSets[0].id
== f"{valid_handover_id} {Zygosity.HOMOZYGOUS}"
)


def test_invalid_resultset_ids(response_summary, responses_with_invalid_resultset_ids):
for response in responses_with_invalid_resultset_ids:
with pytest.raises(
ValueError,
match=re.escape(VlmResponse.resultset_id_error_message_base),
):
VlmResponse(responseSummary=response_summary, response=response)
VlmResponse(
beaconHandovers=beacon_handovers,
responseSummary=response_summary,
response=response,
)