From b668a6010cd0db7f73334091dd8e4b9be42902d9 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 1 Nov 2023 16:01:26 +0100 Subject: [PATCH 1/7] feat: support for new-style imports (#70) --- tests/data/config/varfishrc.projects.toml | 37 ++++++++++++++ .../test_config/test_load_projects/result | 38 ++++++++++++++ tests/test_config.py | 18 +++++++ varfish_cli/config.py | 49 +++++++++++++++++++ 4 files changed, 142 insertions(+) create mode 100644 tests/data/config/varfishrc.projects.toml create mode 100644 tests/snapshots/test_config/test_load_projects/result create mode 100644 tests/test_config.py diff --git a/tests/data/config/varfishrc.projects.toml b/tests/data/config/varfishrc.projects.toml new file mode 100644 index 0000000..018aac4 --- /dev/null +++ b/tests/data/config/varfishrc.projects.toml @@ -0,0 +1,37 @@ +[global] +varfish_server_url = "https://varfish.example.com/" +varfish_api_token = "39c01db5-a808-4262-8b4d-7fd712389b59" + +[[projects]] +title = "S3 Example" +uuid = "00000000-0000-0000-0000-000000000001" +import_data_protocol = "s3" +import_data_host = "s3-server.example.net" +import_data_port = 443 +import_data_path = "bucket-name" +import_data_user = "s3-user" +import_data_password = "s3-key" + +[[projects]] +title = "HTTP Example" +uuid = "00000000-0000-0000-0000-000000000002" +import_data_protocol = "http" +import_data_host = "http-server.example.net" +import_data_path = "/http-prefix" +import_data_user = "http-user" +import_data_password = "http-password" + +[[projects]] +title = "HTTPS Example" +uuid = "00000000-0000-0000-0000-000000000003" +import_data_protocol = "https" +import_data_host = "https-server.example.net" +import_data_path = "/https-prefix" +import_data_user = "https-user" +import_data_password = "https-password" + +[[projects]] +title = "File Example" +uuid = "00000000-0000-0000-0000-000000000004" +import_data_protocol = "file" +import_data_path = "/path/prefix" diff --git a/tests/snapshots/test_config/test_load_projects/result b/tests/snapshots/test_config/test_load_projects/result new file mode 100644 index 0000000..1efa663 --- /dev/null +++ b/tests/snapshots/test_config/test_load_projects/result @@ -0,0 +1,38 @@ +[ + { + "title": "S3 Example", + "uuid": "00000000-0000-0000-0000-000000000001", + "import_data_protocol": "s3", + "import_data_host": "s3-server.example.net", + "import_data_path": "bucket-name", + "import_data_user": "s3-user", + "import_data_password": "s3-key" + }, + { + "title": "HTTP Example", + "uuid": "00000000-0000-0000-0000-000000000002", + "import_data_protocol": "http", + "import_data_host": "http-server.example.net", + "import_data_path": "/http-prefix", + "import_data_user": "http-user", + "import_data_password": "http-password" + }, + { + "title": "HTTPS Example", + "uuid": "00000000-0000-0000-0000-000000000003", + "import_data_protocol": "https", + "import_data_host": "https-server.example.net", + "import_data_path": "/https-prefix", + "import_data_user": "https-user", + "import_data_password": "https-password" + }, + { + "title": "File Example", + "uuid": "00000000-0000-0000-0000-000000000004", + "import_data_protocol": "file", + "import_data_host": null, + "import_data_path": "/path/prefix", + "import_data_user": null, + "import_data_password": null + } +] \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..096e998 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,18 @@ +import json + +import pydantic +from pytest_snapshot.plugin import Snapshot + +from varfish_cli import config + + +def test_load_config(): + result = config.load_config("tests/data/config/varfishrc.toml") + assert result == ("https://varfish.example.com/", "39c01db5-a808-4262-8b4d-7fd712389b59") + + +def test_load_projects(snapshot: Snapshot): + result = config.load_projects("tests/data/config/varfishrc.projects.toml") + assert len(result) == 4 + result_str = json.dumps([obj.model_dump(mode="json") for obj in result.values()], indent=2) + snapshot.assert_match(result_str, "result") diff --git a/varfish_cli/config.py b/varfish_cli/config.py index 363f987..5b60a6b 100644 --- a/varfish_cli/config.py +++ b/varfish_cli/config.py @@ -1,6 +1,8 @@ """Common configuration code.""" +import enum import os +import uuid try: import tomllib @@ -63,3 +65,50 @@ def load_config(config_path: str) -> typing.Tuple[typing.Optional[str], typing.O logger.debug("global/varfish_api_token not set in %s", config_path) return toml_varfish_server_url, toml_varfish_api_token + + +@enum.unique +class ImportDataProtocol(enum.Enum): + """Protocol for importing data.""" + + S3 = "s3" + HTTP = "http" + HTTPS = "https" + FILE = "file" + + +class ProjectConfig(pydantic.BaseModel): + """Configuration for one project in ``~/.varfishrc.toml``.""" + + #: Human-readable name of the project. + title: typing.Optional[str] = None + #: UUID of project. + uuid: uuid.UUID + #: Protocol to use. + import_data_protocol: ImportDataProtocol + #: Host name to use. + import_data_host: typing.Optional[str] = None + #: Path prefix to use. + import_data_path: str + #: User name to use for connecting, if any. + import_data_user: typing.Optional[str] = None + #: Password to use for connecting, if any. + import_data_password: typing.Optional[pydantic.SecretStr] = None + + @pydantic.field_serializer("import_data_password", when_used="json") + def dump_secret(self, v: typing.Optional[pydantic.SecretStr]): + if v: + return v.get_secret_value() + else: + return v + + +def load_projects(config_path: str) -> typing.Dict[uuid.UUID, ProjectConfig]: + """Load projects from configuration TOML file at ``config_path``""" + + with open(config_path, "rt") as inputf: + fcontents = inputf.read() + toml_dict = tomllib.loads(fcontents) + + projects = list(map(ProjectConfig.model_validate, toml_dict.get("projects", []))) + return {p.uuid: p for p in projects} From fbcd143f1e37a49ad74695f62b7db59b868ac7dc Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 1 Nov 2023 17:17:35 +0100 Subject: [PATCH 2/7] wip --- tests/cli/test_projects.py | 58 +++++++++++++++++++++++++++++++++++++ varfish_cli/api/models.py | 19 ++++++++++++ varfish_cli/api/project.py | 30 ++++++++++++++++++- varfish_cli/cli/projects.py | 46 +++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 1 deletion(-) diff --git a/tests/cli/test_projects.py b/tests/cli/test_projects.py index 226e856..f63f27b 100644 --- a/tests/cli/test_projects.py +++ b/tests/cli/test_projects.py @@ -110,3 +110,61 @@ def test_project_retrieve( assert result.exit_code == 0, result.output snapshot.assert_match(result.output, "result_output") + + +def test_project_load_config( + runner: CliRunner, + fake_fs_configured: FakeFs, + requests_mock: RequestsMocker, + fake_conn: typing.Tuple[str, str], + project_retrieve_result, + snapshot: Snapshot, + mocker: MockerFixture, +): + mocker.patch("varfish_cli.config.open", fake_fs_configured.open_, create=True) + mocker.patch("varfish_cli.config.os", fake_fs_configured.os) + + responses = { + "import_data_host": ("STRING", "http-host.example.com"), + "import_data_password": ("STRING", "http-password"), + "import_data_path": ("STRING", "http-prefix/"), + "import_data_port": ("INTEGER", 80), + "import_data_protocol": ("STRING", "http"), + "import_data_user": ("STRING", "http-user"), + } + + project_uuid = str(uuid.uuid4()) + host, token = fake_conn + req_mocks = [] + for setting_name, (setting_type, setting_value) in responses.items(): + req_mocks.append( + requests_mock.get( + ( + f"{host}/project/api/retrieve/{project_uuid}?app_name=cases_import" + f"&setting_name={setting_name}" + ), + request_headers={"Authorization": f"Token {token}"}, + json={ + "project": project_uuid, + "user": None, + "name": setting_name, + "type": setting_type, + "value": setting_value, + "user_modifiable": True, + "app_name": "cases_import", + }, + ) + ) + result = runner.invoke(app, ["--verbose", "projects", "project-load-config", project_uuid]) + + rc_path = fake_fs_configured.os.path.expanduser("~/.varfishrc.toml") + with fake_fs_configured.open_(rc_path, "rt") as inputf: + fcontents = inputf.read() + + mocker.stopall() + + for req_mock in req_mocks: + assert req_mock.called_once, req_mock._netloc + + assert result.exit_code == 0, result.output + snapshot.assert_match(fcontents, "result_output") diff --git a/varfish_cli/api/models.py b/varfish_cli/api/models.py index af5203b..aa6fbed 100644 --- a/varfish_cli/api/models.py +++ b/varfish_cli/api/models.py @@ -341,3 +341,22 @@ class VarAnnoSetEntryV1(pydantic.BaseModel): alternative: str #: Data, the set's fields are the keys. payload: typing.Dict[str, str] + + +class SettingsEntry(pydantic.BaseModel): + """Configuration entry from server""" + + #: Project UUID. + project: typing.Optional[uuid.UUID] + #: User UUID, if any. + user: typing.Optional[uuid.UUID] + #: Name of the app. + app_name: str + #: Name of the setting. + name: str + #: Type of the setting. + type: typing.Literal["STRING", "INTEGER", "BOOLEAN"] + #: Value of the setting. + value: typing.Any + #: Whether the user can modify the setting. + user_modifiable: bool diff --git a/varfish_cli/api/project.py b/varfish_cli/api/project.py index 04b664c..e38a425 100644 --- a/varfish_cli/api/project.py +++ b/varfish_cli/api/project.py @@ -8,7 +8,7 @@ import requests from varfish_cli.api.common import raise_for_status -from varfish_cli.api.models import Project +from varfish_cli.api.models import Project, SettingsEntry from varfish_cli.common import strip_trailing_slash ACCEPT_API_VARFISH = "" @@ -17,6 +17,8 @@ ENDPOINT_PROJECT_LIST = "/project/api/list" #: End point for retrieving projects. ENDPOINT_PROJECT_RETRIEVE = "/project/api/retrieve/{project_uuid}" +#: End point for retrieving projects settings. +ENDPOINT_PROJECT_RETRIEVE = "/project/api/settings/retrieve/{project_uuid}" def project_list( @@ -48,3 +50,29 @@ def project_retrieve( result = requests.get(endpoint, headers=headers, verify=verify_ssl) raise_for_status(result) return pydantic.TypeAdapter(Project).validate_python(result.json()) + + +def project_settings_retrieve( + server_url: str, + api_token: str, + project_uuid: typing.Union[str, uuid.UUID], + app_name: typing.Optional[str], + setting_name: typing.Optional[str], + verify_ssl: bool = True, + ) -> SettingsEntry: + server_url = strip_trailing_slash(server_url) + queries = [] + if app_name: + queries.append(f"app_name={app_name}") + if setting_name: + queries.append(f"setting_name={setting_name}") + query = "&".join(queries) + if query: + query = f"?{query}" + endpoint = f"{server_url}{ENDPOINT_PROJECT_RETRIEVE}{query}".format(project_uuid=project_uuid) + logger.debug("Sending GET request to end point %s", endpoint) + headers = {"Authorization": "Token %s" % api_token} + result = requests.get(endpoint, headers=headers, verify=verify_ssl) + raise_for_status(result) + print(result.json()) + return pydantic.TypeAdapter(SettingsEntry).validate_python(result.json()) diff --git a/varfish_cli/cli/projects.py b/varfish_cli/cli/projects.py index bce2c27..5951756 100644 --- a/varfish_cli/cli/projects.py +++ b/varfish_cli/cli/projects.py @@ -3,11 +3,13 @@ import typing import uuid +from logzero import logger import typer from varfish_cli import api, common from varfish_cli.cli.common import ListObjects, RetrieveObject from varfish_cli.common import OutputFormat +from varfish_cli.config import CommonOptions #: Default fields for projects. DEFAULT_FIELDS_PROJECT: typing.Dict[OutputFormat, typing.Optional[typing.Tuple[str, ...]]] = { @@ -76,3 +78,47 @@ def cli_project_retrieve( object_uuid=object_uuid, output_file=output_file, ) + + +@app.command("project-load-config") +def cli_project_load_config( + ctx: typer.Context, + project_uuid: typing.Annotated[ + uuid.UUID, typer.Argument(..., help="UUID of the object to retrieve") + ], + output_file: typing.Annotated[ + str, typer.Option("--output-file", help="Path to file to write to") + ] = "-", +): + """Load project configuration for import and store in ~/.varfishrc.toml""" + common_options: common.CommonOptions = ctx.obj + logger.info("Retrieving project configuration...") + + fields_types = { + "import_data_host": str, + "import_data_password": str, + "import_data_path": str, + "import_data_port": int, + "import_data_protocol": str, + "import_data_user": str, + } + + kwargs = {} + for field_name, field_type in fields_types.items(): + logger.debug(" - retrieving %s", field_name) + setting_entry = api.project_settings_retrieve( + server_url=common_options.varfish_server_url, + api_token=common_options.varfish_api_token.get_secret_value(), + project_uuid=project_uuid, + app_name="cases_import", + setting_name=field_name, + verify_ssl=common_options.verify_ssl, + ) + print(setting_entry) + if setting_entry.value: + kwargs[field_name] = field_type(setting_entry.value) + print(kwargs) + + logger.info("... all data retrieved, updating config...") + + print(kwargs) From 1ef0cf1dadade001622b080af154e9126f4b31c3 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Thu, 2 Nov 2023 10:51:41 +0100 Subject: [PATCH 3/7] wip --- tests/cli/test_projects.py | 2 +- varfish_cli/api/project.py | 5 ++--- varfish_cli/cli/projects.py | 26 ++++++++++++++++++++++++++ varfish_cli/config.py | 2 +- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tests/cli/test_projects.py b/tests/cli/test_projects.py index f63f27b..4616d63 100644 --- a/tests/cli/test_projects.py +++ b/tests/cli/test_projects.py @@ -140,7 +140,7 @@ def test_project_load_config( req_mocks.append( requests_mock.get( ( - f"{host}/project/api/retrieve/{project_uuid}?app_name=cases_import" + f"{host}/project/api/settings/retrieve/{project_uuid}?app_name=cases_import" f"&setting_name={setting_name}" ), request_headers={"Authorization": f"Token {token}"}, diff --git a/varfish_cli/api/project.py b/varfish_cli/api/project.py index e38a425..1c73b3f 100644 --- a/varfish_cli/api/project.py +++ b/varfish_cli/api/project.py @@ -18,7 +18,7 @@ #: End point for retrieving projects. ENDPOINT_PROJECT_RETRIEVE = "/project/api/retrieve/{project_uuid}" #: End point for retrieving projects settings. -ENDPOINT_PROJECT_RETRIEVE = "/project/api/settings/retrieve/{project_uuid}" +ENDPOINT_PROJECT_SETTING_RETRIEVE = "/project/api/settings/retrieve/{project_uuid}" def project_list( @@ -69,10 +69,9 @@ def project_settings_retrieve( query = "&".join(queries) if query: query = f"?{query}" - endpoint = f"{server_url}{ENDPOINT_PROJECT_RETRIEVE}{query}".format(project_uuid=project_uuid) + endpoint = f"{server_url}{ENDPOINT_PROJECT_SETTING_RETRIEVE}{query}".format(project_uuid=project_uuid) logger.debug("Sending GET request to end point %s", endpoint) headers = {"Authorization": "Token %s" % api_token} result = requests.get(endpoint, headers=headers, verify=verify_ssl) raise_for_status(result) - print(result.json()) return pydantic.TypeAdapter(SettingsEntry).validate_python(result.json()) diff --git a/varfish_cli/cli/projects.py b/varfish_cli/cli/projects.py index 5951756..58d4723 100644 --- a/varfish_cli/cli/projects.py +++ b/varfish_cli/cli/projects.py @@ -1,5 +1,14 @@ """Implementation of varfish-cli subcommand "projects *".""" +import os + +try: + import tomllib + from tomllib import TOMLDecodeError +except ImportError: + import toml as tomllib + from toml import TomlDecodeError as TOMLDecodeError + import typing import uuid @@ -7,6 +16,7 @@ import typer from varfish_cli import api, common +from varfish_cli.cli import DEFAULT_PATH_VARFISHRC from varfish_cli.cli.common import ListObjects, RetrieveObject from varfish_cli.common import OutputFormat from varfish_cli.config import CommonOptions @@ -86,6 +96,10 @@ def cli_project_load_config( project_uuid: typing.Annotated[ uuid.UUID, typer.Argument(..., help="UUID of the object to retrieve") ], + config_path: typing.Annotated[ + str, + typer.Option("--config-path", help="Path to configuration file", envvar="VARFISH_RC_PATH"), + ] = DEFAULT_PATH_VARFISHRC, output_file: typing.Annotated[ str, typer.Option("--output-file", help="Path to file to write to") ] = "-", @@ -121,4 +135,16 @@ def cli_project_load_config( logger.info("... all data retrieved, updating config...") + if not os.path.exists(config_path): + with open(config_path, "rt") as tomlf: + try: + config_toml = tomllib.loads(tomlf.read()) + except TOMLDecodeError as e: + logger.error("could not parse configuration file %s: %s", config_path, e) + raise typer.Exit(1) + else: + config_toml = {} + + config_toml.setdefault("paths", []) + print(kwargs) diff --git a/varfish_cli/config.py b/varfish_cli/config.py index 5b60a6b..53e5c09 100644 --- a/varfish_cli/config.py +++ b/varfish_cli/config.py @@ -60,7 +60,7 @@ def load_config(config_path: str) -> typing.Tuple[typing.Optional[str], typing.O logger.debug("global/varfish_server_url not set in %s", config_path) toml_varfish_api_token = config_toml.get("global", {}).get("varfish_api_token") if toml_varfish_api_token: - logger.debug("using global/varfish_server_url from %s", config_path) + logger.debug("using global/varfish_api_token from %s", config_path) else: logger.debug("global/varfish_api_token not set in %s", config_path) From 5b81cca48d4e29717516e76fe435313e646f48a1 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 3 Nov 2023 10:09:00 +0100 Subject: [PATCH 4/7] getting project config from server works --- requirements/base.txt | 2 +- .../result_output | 11 +++++ tests/cli/test_projects.py | 28 +++++++++--- tests/test_config.py | 1 - varfish_cli/api/project.py | 6 ++- varfish_cli/cli/__init__.py | 4 +- varfish_cli/cli/common.py | 4 ++ varfish_cli/cli/projects.py | 43 +++++++++---------- varfish_cli/config.py | 17 +++----- 9 files changed, 69 insertions(+), 47 deletions(-) create mode 100644 tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output diff --git a/requirements/base.txt b/requirements/base.txt index 70d4651..b2a031a 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,7 +6,7 @@ python-dateutil >=2.8.1,<3.0 # pydantic: typed models and validation pydantic >=2,<3 -# toml parsing if python <3.11 +# toml parsing and writing toml >=0.10.2,<0.11 # typer: typed command line interfaces. diff --git a/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output b/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output new file mode 100644 index 0000000..754ccd0 --- /dev/null +++ b/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output @@ -0,0 +1,11 @@ +[[projects]] +uuid = "16251f30-1168-41c9-8af6-07c8f40f6860" +import_data_protocol = "http" +import_data_host = "http-host.example.com" +import_data_path = "http-prefix/" +import_data_user = "http-user" +import_data_password = "http-password" + +[global] +varfish_server_url = "http://varfish.example.com:8080" +varfish_api_token = "faKeTOKeN" diff --git a/tests/cli/test_projects.py b/tests/cli/test_projects.py index 4616d63..372859c 100644 --- a/tests/cli/test_projects.py +++ b/tests/cli/test_projects.py @@ -1,6 +1,7 @@ """Test CLI for projects API.""" import json +import types import typing import uuid @@ -12,6 +13,8 @@ from tests.conftest import FakeFs from varfish_cli.cli import app +from varfish_cli.cli.projects import cli_project_load_config +from varfish_cli.config import CommonOptions @pytest.fixture @@ -112,17 +115,17 @@ def test_project_retrieve( snapshot.assert_match(result.output, "result_output") -def test_project_load_config( - runner: CliRunner, +def test_project_load_config_raw_func_call( fake_fs_configured: FakeFs, requests_mock: RequestsMocker, fake_conn: typing.Tuple[str, str], - project_retrieve_result, snapshot: Snapshot, mocker: MockerFixture, ): mocker.patch("varfish_cli.config.open", fake_fs_configured.open_, create=True) mocker.patch("varfish_cli.config.os", fake_fs_configured.os) + mocker.patch("varfish_cli.cli.projects.open", fake_fs_configured.open_, create=True) + mocker.patch("varfish_cli.cli.projects.os", fake_fs_configured.os, create=True) responses = { "import_data_host": ("STRING", "http-host.example.com"), @@ -133,7 +136,7 @@ def test_project_load_config( "import_data_user": ("STRING", "http-user"), } - project_uuid = str(uuid.uuid4()) + project_uuid = "16251f30-1168-41c9-8af6-07c8f40f6860" host, token = fake_conn req_mocks = [] for setting_name, (setting_type, setting_value) in responses.items(): @@ -155,7 +158,21 @@ def test_project_load_config( }, ) ) - result = runner.invoke(app, ["--verbose", "projects", "project-load-config", project_uuid]) + + ctx = types.SimpleNamespace( + obj=CommonOptions( + verbose=True, + verify_ssl=False, + config=None, + varfish_server_url=host, + varfish_api_token=token, + ) + ) + cli_project_load_config( + ctx, + project_uuid=project_uuid, + config_path=fake_fs_configured.os.path.expanduser("~/.varfishrc.toml"), + ) rc_path = fake_fs_configured.os.path.expanduser("~/.varfishrc.toml") with fake_fs_configured.open_(rc_path, "rt") as inputf: @@ -166,5 +183,4 @@ def test_project_load_config( for req_mock in req_mocks: assert req_mock.called_once, req_mock._netloc - assert result.exit_code == 0, result.output snapshot.assert_match(fcontents, "result_output") diff --git a/tests/test_config.py b/tests/test_config.py index 096e998..ac5450d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,6 +1,5 @@ import json -import pydantic from pytest_snapshot.plugin import Snapshot from varfish_cli import config diff --git a/varfish_cli/api/project.py b/varfish_cli/api/project.py index 1c73b3f..16f04a0 100644 --- a/varfish_cli/api/project.py +++ b/varfish_cli/api/project.py @@ -59,7 +59,7 @@ def project_settings_retrieve( app_name: typing.Optional[str], setting_name: typing.Optional[str], verify_ssl: bool = True, - ) -> SettingsEntry: +) -> SettingsEntry: server_url = strip_trailing_slash(server_url) queries = [] if app_name: @@ -69,7 +69,9 @@ def project_settings_retrieve( query = "&".join(queries) if query: query = f"?{query}" - endpoint = f"{server_url}{ENDPOINT_PROJECT_SETTING_RETRIEVE}{query}".format(project_uuid=project_uuid) + endpoint = f"{server_url}{ENDPOINT_PROJECT_SETTING_RETRIEVE}{query}".format( + project_uuid=project_uuid + ) logger.debug("Sending GET request to end point %s", endpoint) headers = {"Authorization": "Token %s" % api_token} result = requests.get(endpoint, headers=headers, verify=verify_ssl) diff --git a/varfish_cli/cli/__init__.py b/varfish_cli/cli/__init__.py index 38a0434..4dd6687 100644 --- a/varfish_cli/cli/__init__.py +++ b/varfish_cli/cli/__init__.py @@ -8,12 +8,10 @@ from varfish_cli import __version__ from varfish_cli.cli import cases, importer, projects, varannos +from varfish_cli.cli.common import DEFAULT_PATH_VARFISHRC from varfish_cli.config import CommonOptions, load_config from varfish_cli.exceptions import InvalidConfiguration -#: Paths to search the global configuration in. -DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" - def version_callback(value: bool): """Callback when called with 'version' or '--version'""" diff --git a/varfish_cli/cli/common.py b/varfish_cli/cli/common.py index f0a3903..a154e2c 100644 --- a/varfish_cli/cli/common.py +++ b/varfish_cli/cli/common.py @@ -13,6 +13,10 @@ from varfish_cli.common import OutputFormat from varfish_cli.exceptions import RestApiCallException +#: Paths to search the global configuration in. +DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" + + #: Type to use for model in the helper classes below. ModelType = typing.TypeVar("ModelType", bound=pydantic.BaseModel) diff --git a/varfish_cli/cli/projects.py b/varfish_cli/cli/projects.py index 58d4723..ec86e38 100644 --- a/varfish_cli/cli/projects.py +++ b/varfish_cli/cli/projects.py @@ -1,25 +1,17 @@ """Implementation of varfish-cli subcommand "projects *".""" import os - -try: - import tomllib - from tomllib import TOMLDecodeError -except ImportError: - import toml as tomllib - from toml import TomlDecodeError as TOMLDecodeError - import typing import uuid from logzero import logger +import toml import typer from varfish_cli import api, common -from varfish_cli.cli import DEFAULT_PATH_VARFISHRC -from varfish_cli.cli.common import ListObjects, RetrieveObject +from varfish_cli.cli.common import DEFAULT_PATH_VARFISHRC, ListObjects, RetrieveObject from varfish_cli.common import OutputFormat -from varfish_cli.config import CommonOptions +from varfish_cli.config import ProjectConfig #: Default fields for projects. DEFAULT_FIELDS_PROJECT: typing.Dict[OutputFormat, typing.Optional[typing.Tuple[str, ...]]] = { @@ -100,9 +92,6 @@ def cli_project_load_config( str, typer.Option("--config-path", help="Path to configuration file", envvar="VARFISH_RC_PATH"), ] = DEFAULT_PATH_VARFISHRC, - output_file: typing.Annotated[ - str, typer.Option("--output-file", help="Path to file to write to") - ] = "-", ): """Load project configuration for import and store in ~/.varfishrc.toml""" common_options: common.CommonOptions = ctx.obj @@ -117,7 +106,7 @@ def cli_project_load_config( "import_data_user": str, } - kwargs = {} + kwargs = {"uuid": project_uuid} for field_name, field_type in fields_types.items(): logger.debug(" - retrieving %s", field_name) setting_entry = api.project_settings_retrieve( @@ -128,23 +117,33 @@ def cli_project_load_config( setting_name=field_name, verify_ssl=common_options.verify_ssl, ) - print(setting_entry) if setting_entry.value: kwargs[field_name] = field_type(setting_entry.value) - print(kwargs) + project_config = ProjectConfig(**kwargs).model_dump(mode="json") logger.info("... all data retrieved, updating config...") + logger.debug(" - project_config: %s", project_config) - if not os.path.exists(config_path): + if os.path.exists(config_path): with open(config_path, "rt") as tomlf: try: - config_toml = tomllib.loads(tomlf.read()) - except TOMLDecodeError as e: + config_toml = toml.loads(tomlf.read()) + except toml.TomlDecodeError as e: logger.error("could not parse configuration file %s: %s", config_path, e) raise typer.Exit(1) else: config_toml = {} - config_toml.setdefault("paths", []) + config_toml.setdefault("projects", []) + match_idx = None + for idx, project in enumerate(config_toml["projects"]): + if project["project"] == str(project_config["uuid"]): + match_idx = idx + break + else: + config_toml["projects"].append(project_config) + if match_idx is not None: + config_toml["projects"][match_idx] = project_config - print(kwargs) + with open(config_path, "wt") as outputf: + outputf.write(toml.dumps(config_toml)) diff --git a/varfish_cli/config.py b/varfish_cli/config.py index 53e5c09..ec197c9 100644 --- a/varfish_cli/config.py +++ b/varfish_cli/config.py @@ -2,19 +2,12 @@ import enum import os -import uuid - -try: - import tomllib - from tomllib import TOMLDecodeError -except ImportError: - import toml as tomllib - from toml import TomlDecodeError as TOMLDecodeError - import typing +import uuid from logzero import logger import pydantic +import toml import typer @@ -49,8 +42,8 @@ def load_config(config_path: str) -> typing.Tuple[typing.Optional[str], typing.O logger.debug("loading configuration from %s", config_path) with open(config_path, "rt") as tomlf: try: - config_toml = tomllib.loads(tomlf.read()) - except TOMLDecodeError as e: + config_toml = toml.loads(tomlf.read()) + except toml.TomlDecodeError as e: logger.error("could not parse configuration file %s: %s", config_path, e) raise typer.Exit(1) toml_varfish_server_url = config_toml.get("global", {}).get("varfish_server_url") @@ -108,7 +101,7 @@ def load_projects(config_path: str) -> typing.Dict[uuid.UUID, ProjectConfig]: with open(config_path, "rt") as inputf: fcontents = inputf.read() - toml_dict = tomllib.loads(fcontents) + toml_dict = toml.loads(fcontents) projects = list(map(ProjectConfig.model_validate, toml_dict.get("projects", []))) return {p.uuid: p for p in projects} From 4b753809ed6dc89f1c2c8889db1543a7eeed52d8 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 3 Nov 2023 12:44:37 +0100 Subject: [PATCH 5/7] wip --- docs/cases_import.rst | 84 +++++++++++++++++++ docs/index.rst | 1 + requirements/base.txt | 6 ++ tests/conftest.py | 31 ++++++- .../test_load_project_config/configuration | 9 ++ tests/test_common.py | 31 +++++++ tests/test_ftypes.py | 7 ++ varfish_cli/api/varannos.py | 1 - varfish_cli/cli/__init__.py | 5 +- varfish_cli/cli/cases_import.py | 43 ++++++++++ varfish_cli/cli/common.py | 4 - varfish_cli/cli/projects.py | 8 +- varfish_cli/common.py | 20 +++++ varfish_cli/ftypes/__init__.py | 66 +++++++++++++++ 14 files changed, 305 insertions(+), 11 deletions(-) create mode 100644 docs/cases_import.rst create mode 100644 tests/snapshots/test_common/test_load_project_config/configuration create mode 100644 tests/test_common.py create mode 100644 tests/test_ftypes.py create mode 100644 varfish_cli/cli/cases_import.py create mode 100644 varfish_cli/ftypes/__init__.py diff --git a/docs/cases_import.rst b/docs/cases_import.rst new file mode 100644 index 0000000..b0713ea --- /dev/null +++ b/docs/cases_import.rst @@ -0,0 +1,84 @@ +.. _main-cases_import: + +=========== +Case Import +=========== + +------------------------- +Phenopacket Bootstrapping +------------------------- + +.. note:: + Currently, only PED and VCF files are supported for bootstrapping phenopackets.`a` + +You must have loaded the project configuration via ``projects project-retrieve`` so the client knows the location/server and credentials of the raw data. + +The ``cases-import bootstrap-phenopackets`` will then go over each file, incorporate it into the phenopackets file, and write out the phenopackets YAML. + +The other files are handled as follows. +All absolute paths are assumed to be on the local file system whereas relative paths are assumed to be relative to the project import data store. +Note that these absolute paths are also written to the phenopackets YAML file and this will not work in the import. + +``*.ped`` + PED/Pedigree file, used to derive sample information from. + You can specify at most one PED file and it will overwrite existing pedigree information. + +``*.bam``, ``*.bam.bai`` + The header of sequence alignment files will be read and the sample name is used to match it to the pedigree. + Note that the samples in the BAM file and the PED file must match. + BAM files must be indexed. + +``*.vcf.gz``, ``*.vcf.gz.tbi`` + The header of variant call files will be read as well as the first ten records. + This will be used to differentiate between sequence and structural variant files. + You can currently only give at most one sequence variant file but any number of structural variant files. + VCF files must be indexed. + +``$FILE.md5`` + Assumed to be the MD5 checksum file of ``$FILE`` and stored as checksum attribute for it. + +``*.csv``, ``*.txt``, ... + Information related to quality control from pipelines. + The command will try to detect the file types and register them into the phenopackets YAML file appropriately. + +The ``--target-region`` argument can be given multiple time and specify the target regions of the used sequencing kit. +Supported target regions must be configured on the server. +They are given as pseudo S3 URLs in the internal storage where the server administrator must configure them. + +The following target regions are available by default (for ``$RELEASE`` being one of ``GRCh37`` or ``GRCh38``) on a VarFish server installation. + +whole genome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/whole-genome.bed.gz`` + +Agilent SureSelect Human All Exon V4 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v4.bed.gz`` + +Agilent SureSelect Human All Exon V5 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v5.bed.gz`` + +Agilent SureSelect Human All Exon V6 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v6.bed.gz`` + +Agilent SureSelect Human All Exon V7 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v7.bed.gz`` + +Agilent SureSelect Human All Exon V8 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v8.bed.gz`` + +IDT xGen Exome Research Panel v1 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v1.bed.gz`` + +IDT xGen Exome Research Panel v2 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v2.bed.gz`` + +Twist Comprehensive Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-comprehensive-exome.bed.gz`` + +Twist Core Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-core-exome.bed.gz`` + +Twist Exome V2.0 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-exome-v2_0.bed.gz`` + +Twist RefSeq Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-refseq-exome.bed.gz`` diff --git a/docs/index.rst b/docs/index.rst index 41d8ed3..74f277d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ This documentation will be expanded over time. :maxdepth: 1 installation + cases_import .. toctree:: :caption: Project Info diff --git a/requirements/base.txt b/requirements/base.txt index b2a031a..4ae500b 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -38,3 +38,9 @@ jsonschema >=4.4.0,<4.20 # Type checking typeguard >=2.13.3,<3.0 + +# Phenopackets parsing +phenopackets >=2.0,<3.0 + +# Access to VCF files. +vcfpy >=0.13.6,<0.14 diff --git a/tests/conftest.py b/tests/conftest.py index 6ad9b25..04a9716 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,7 +46,7 @@ def fake_conn() -> typing.Tuple[str, str]: @pytest.fixture def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs: - """Fake file system with filled ``~/.varfishrc.toml``""" + """Fake file system with minimal ``~/.varfishrc.toml``""" host, token = fake_conn conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml") fake_fs.fs.create_file( @@ -61,3 +61,32 @@ def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> Fa + "\n", ) return fake_fs + + +@pytest.fixture +def fake_fs_configured_projects(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs: + """Fake file system with ``~/.varfishrc.toml`` that also has project config""" + host, token = fake_conn + conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml") + fake_fs.fs.create_file( + conf_file_path, + contents="\n".join( + [ + "[global]", + f'varfish_server_url = "{host}"', + f'varfish_api_token = "{token}"', + "", + "[[projects]]", + 'title = "S3 Example"', + 'uuid = "00000000-0000-0000-0000-000000000001"', + 'import_data_protocol = "s3"', + 'import_data_host = "s3-server.example.net"', + "import_data_port = 443", + 'import_data_path = "bucket-name"', + 'import_data_user = "s3-user"', + 'import_data_password = "s3-key"', + ] + ) + + "\n", + ) + return fake_fs diff --git a/tests/snapshots/test_common/test_load_project_config/configuration b/tests/snapshots/test_common/test_load_project_config/configuration new file mode 100644 index 0000000..d18aba2 --- /dev/null +++ b/tests/snapshots/test_common/test_load_project_config/configuration @@ -0,0 +1,9 @@ +{ + "title": "S3 Example", + "uuid": "00000000-0000-0000-0000-000000000001", + "import_data_protocol": "s3", + "import_data_host": "s3-server.example.net", + "import_data_path": "bucket-name", + "import_data_user": "s3-user", + "import_data_password": "s3-key" +} \ No newline at end of file diff --git a/tests/test_common.py b/tests/test_common.py new file mode 100644 index 0000000..15ee7e2 --- /dev/null +++ b/tests/test_common.py @@ -0,0 +1,31 @@ +import json +import typing +import uuid + +import pytest +from pytest_mock import MockerFixture +from pytest_snapshot.plugin import Snapshot +from requests_mock.mocker import Mocker as RequestsMocker +from typer.testing import CliRunner + +from tests.conftest import FakeFs +from varfish_cli import common + + +def test_load_project_config( + fake_fs_configured_projects: FakeFs, + mocker: MockerFixture, + snapshot: Snapshot, +): + mocker.patch("varfish_cli.common.open", fake_fs_configured_projects.open_, create=True) + mocker.patch("varfish_cli.common.os", fake_fs_configured_projects.os) + + config_null = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000000")) + config_some = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000001")) + + mocker.stopall() + + assert config_null is None + snapshot.assert_match( + json.dumps(config_some.model_dump(mode="json"), indent=2), "configuration" + ) diff --git a/tests/test_ftypes.py b/tests/test_ftypes.py new file mode 100644 index 0000000..6389fe8 --- /dev/null +++ b/tests/test_ftypes.py @@ -0,0 +1,7 @@ +from varfish_cli.ftypes import FileType, guess_by_path + + +def test_guess_by_path(): + assert guess_by_path("file.md5") == FileType.MD5 + assert guess_by_path("file.bam") == FileType.BAM + assert guess_by_path("file.xyz") == FileType.UNKNOWN diff --git a/varfish_cli/api/varannos.py b/varfish_cli/api/varannos.py index bd4cee4..aa2cf18 100644 --- a/varfish_cli/api/varannos.py +++ b/varfish_cli/api/varannos.py @@ -64,7 +64,6 @@ def varannoset_create( endpoint, headers=headers, data=payload.model_dump(mode="json"), verify=verify_ssl ) raise_for_status(result) - print(result.json()) return pydantic.TypeAdapter(VarAnnoSetV1).validate_python(result.json()) diff --git a/varfish_cli/cli/__init__.py b/varfish_cli/cli/__init__.py index 4dd6687..4195f99 100644 --- a/varfish_cli/cli/__init__.py +++ b/varfish_cli/cli/__init__.py @@ -7,8 +7,8 @@ import typer from varfish_cli import __version__ -from varfish_cli.cli import cases, importer, projects, varannos -from varfish_cli.cli.common import DEFAULT_PATH_VARFISHRC +from varfish_cli.cli import cases, cases_import, importer, projects, varannos +from varfish_cli.common import DEFAULT_PATH_VARFISHRC from varfish_cli.config import CommonOptions, load_config from varfish_cli.exceptions import InvalidConfiguration @@ -27,6 +27,7 @@ def version_callback(value: bool): app.add_typer(varannos.app, name="varannos", help="Subcommands for 'varannos' API") app.add_typer(projects.app, name="projects", help="Subcommands for 'project' API") app.add_typer(cases.app, name="cases", help="Subcommands for 'cases' API") +app.add_typer(cases_import.app, name="cases-import", help="Subcommands for 'cases-import' API") app.add_typer(importer.app, name="importer", help="Subcommands for 'importer' API") diff --git a/varfish_cli/cli/cases_import.py b/varfish_cli/cli/cases_import.py new file mode 100644 index 0000000..76b5d29 --- /dev/null +++ b/varfish_cli/cli/cases_import.py @@ -0,0 +1,43 @@ +"""Implementation of varfish-cli subcommand "cases-import *".""" + +import typing +import uuid + +from logzero import logger +import typer + +from varfish_cli import api, common +from varfish_cli.cli.common import ListObjects, RetrieveObject +from varfish_cli.common import DEFAULT_PATH_VARFISHRC, OutputFormat + +#: The ``Typer`` instance to use for the ``cases-import`` sub command. +app = typer.Typer(no_args_is_help=True) + + +@app.command("bootstrap-phenopackets") +def cli_bootstrap_phenopackets( + ctx: typer.Context, + project_uuid: typing.Annotated[uuid.UUID, typer.Argument(..., help="UUID of project")], + phenopacket_file: typing.Annotated[ + str, typer.Argument(..., help="Path of phenopacket file to bootstrap") + ], + other_files: typing.Annotated[ + typing.List[str], typer.Argument(..., help="Paths of files to incorporate") + ], + target_region: typing.Annotated[ + typing.List[str], typer.Option("--target-region", help="Target region specification") + ], + config_path: typing.Annotated[ + str, + typer.Option("--config-path", help="Path to configuration file", envvar="VARFISH_RC_PATH"), + ] = DEFAULT_PATH_VARFISHRC, +): + """Bootstrap a new or existing phenopackets YAML file""" + common_options: common.CommonOptions = ctx.obj + + #: load configuration for the selected project + logger.info("Loading configuration for project %s from %s", project_uuid, config_path) + project_config = common.load_project_config(project_uuid, config_path=config_path) + if not project_config: + logger.error("No configuration found for project %s", project_uuid) + raise typer.Exit(1) diff --git a/varfish_cli/cli/common.py b/varfish_cli/cli/common.py index a154e2c..f0a3903 100644 --- a/varfish_cli/cli/common.py +++ b/varfish_cli/cli/common.py @@ -13,10 +13,6 @@ from varfish_cli.common import OutputFormat from varfish_cli.exceptions import RestApiCallException -#: Paths to search the global configuration in. -DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" - - #: Type to use for model in the helper classes below. ModelType = typing.TypeVar("ModelType", bound=pydantic.BaseModel) diff --git a/varfish_cli/cli/projects.py b/varfish_cli/cli/projects.py index ec86e38..9b989d3 100644 --- a/varfish_cli/cli/projects.py +++ b/varfish_cli/cli/projects.py @@ -9,8 +9,8 @@ import typer from varfish_cli import api, common -from varfish_cli.cli.common import DEFAULT_PATH_VARFISHRC, ListObjects, RetrieveObject -from varfish_cli.common import OutputFormat +from varfish_cli.cli.common import ListObjects, RetrieveObject +from varfish_cli.common import DEFAULT_PATH_VARFISHRC, OutputFormat from varfish_cli.config import ProjectConfig #: Default fields for projects. @@ -124,6 +124,7 @@ def cli_project_load_config( logger.info("... all data retrieved, updating config...") logger.debug(" - project_config: %s", project_config) + config_path = os.path.expanduser(config_path) if os.path.exists(config_path): with open(config_path, "rt") as tomlf: try: @@ -137,7 +138,7 @@ def cli_project_load_config( config_toml.setdefault("projects", []) match_idx = None for idx, project in enumerate(config_toml["projects"]): - if project["project"] == str(project_config["uuid"]): + if project["uuid"] == str(project_config["uuid"]): match_idx = idx break else: @@ -147,3 +148,4 @@ def cli_project_load_config( with open(config_path, "wt") as outputf: outputf.write(toml.dumps(config_toml)) + logger.info("All done. Have a nice day!") diff --git a/varfish_cli/common.py b/varfish_cli/common.py index af0bcc1..f72a48a 100644 --- a/varfish_cli/common.py +++ b/varfish_cli/common.py @@ -5,10 +5,17 @@ from enum import Enum, unique import io import json +import os import typing import uuid from tabulate import tabulate +import toml + +from varfish_cli.config import ProjectConfig + +#: Paths to search the global configuration in. +DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" class CustomEncoder(json.JSONEncoder): @@ -89,3 +96,16 @@ def load_json(path_or_payload: str) -> typing.Any: return json.load(inputf) else: return json.loads(path_or_payload) + + +def load_project_config( + project_uuid: uuid.UUID, *, config_path: str = DEFAULT_PATH_VARFISHRC +) -> typing.Optional[ProjectConfig]: + """Load project configuration from file.""" + config_path = os.path.expanduser(config_path) + with open(config_path, "rt") as inputf: + config = toml.loads(inputf.read()) + for project in config["projects"]: + if project["uuid"] == str(project_uuid): + return ProjectConfig(**project) + return None diff --git a/varfish_cli/ftypes/__init__.py b/varfish_cli/ftypes/__init__.py new file mode 100644 index 0000000..ac008a3 --- /dev/null +++ b/varfish_cli/ftypes/__init__.py @@ -0,0 +1,66 @@ +"""Support for file types (for upload).""" + +import enum +import pathlib +import typing +import warnings + + +class UnsupportedFileTypeWarning(UserWarning): + """Warning for unsupported file types.""" + + +@enum.unique +class FileType(enum.Enum): + """Enumeration of supported file types.""" + + #: Unknown file type. + UNKNOWN = "UNKNOWN" + + #: MD5 checksum file. + MD5 = "MD5" + #: SHA1 checksum file. + SHA1 = "SHA1" + #: SHA256 checksum file. + SHA256 = "SHA256" + + #: BAM file. + BAM = "BAM" + #: BAM index file. + BAM_BAI = "BAM_BAI" + + #: VCF file. + VCF = "VCF" + #: VCF tabix index file. + VCF_TBI = "VCF_TBI" + + #: PLINK pedigree file. + PED = "PED" + + @property + def is_checksum(self): + """Return whether this is a checksum file.""" + return self in (self.MD5, self.SHA1, self.SHA256) + + +#: Map from file suffixes to file types. +SUFFIX_MAP = { + ".md5": FileType.MD5, + ".sha1": FileType.SHA1, + ".sha256": FileType.SHA256, + ".bam": FileType.BAM, + ".bam.bai": FileType.BAM_BAI, + ".vcf": FileType.VCF, + ".vcf.gz": FileType.VCF, + ".vcf.tbi": FileType.VCF_TBI, + ".ped": FileType.PED, +} + + +def guess_by_path(path: typing.Union[str, pathlib.Path]) -> FileType: + """File file type by path.""" + path_ = pathlib.Path(path) + for suffix, file_type in SUFFIX_MAP.items(): + if path_.name.endswith(suffix): + return file_type + return FileType.UNKNOWN From 2cb099b858368bef202c855ae76cb77732e6132a Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 3 Nov 2023 13:26:09 +0100 Subject: [PATCH 6/7] wip --- requirements/base.txt | 3 + .../test_varannoset_create/result_output | 1 - varfish_cli/cli/cases_import.py | 55 ++++++++++++++++++- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 4ae500b..0976316 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -42,5 +42,8 @@ typeguard >=2.13.3,<3.0 # Phenopackets parsing phenopackets >=2.0,<3.0 +# Parsing of YAML files +pyyaml >=6,<7 + # Access to VCF files. vcfpy >=0.13.6,<0.14 diff --git a/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output b/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output index c419f72..7138f18 100644 --- a/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output +++ b/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output @@ -1,4 +1,3 @@ -{'sodar_uuid': 'e211747f-2a50-4a65-b192-c96bc2e111fa', 'date_created': '2023-10-31T08:15:15+01:00', 'date_modified': '2023-10-31T08:15:15+01:00', 'project': '062b8838-453f-4cf3-817d-a5ec76546462', 'title': 'my title', 'description': 'None', 'release': 'GRCh37', 'fields': ['pathogenicity', 'notes']} { "sodar_uuid": "e211747f-2a50-4a65-b192-c96bc2e111fa", "date_created": "2023-10-31T08:15:15+01:00", diff --git a/varfish_cli/cli/cases_import.py b/varfish_cli/cli/cases_import.py index 76b5d29..0bfe622 100644 --- a/varfish_cli/cli/cases_import.py +++ b/varfish_cli/cli/cases_import.py @@ -1,10 +1,15 @@ """Implementation of varfish-cli subcommand "cases-import *".""" +import datetime +import os import typing import uuid +from google.protobuf.json_format import MessageToDict, ParseDict, ParseError from logzero import logger +from phenopackets import Family import typer +import yaml from varfish_cli import api, common from varfish_cli.cli.common import ListObjects, RetrieveObject @@ -33,11 +38,55 @@ def cli_bootstrap_phenopackets( ] = DEFAULT_PATH_VARFISHRC, ): """Bootstrap a new or existing phenopackets YAML file""" - common_options: common.CommonOptions = ctx.obj - - #: load configuration for the selected project + # load configuration for the selected project logger.info("Loading configuration for project %s from %s", project_uuid, config_path) project_config = common.load_project_config(project_uuid, config_path=config_path) if not project_config: logger.error("No configuration found for project %s", project_uuid) raise typer.Exit(1) + + # split files by file type + assert False + + # if we do not have a phenopacket file, ensure that we have a PED file + assert False + + # load phenopacket file or create new one + family: Family + create_output: bool + if os.path.exists(phenopacket_file): + create_output = False + with open(phenopacket_file, "rt") as inputf: + try: + yaml_dict = yaml.safe_load(inputf) + except yaml.parser.ParserError as e: + logger.error("Could not parse phenopacket YAML file: %s", e) + raise typer.Exit(1) + if "family" not in yaml_dict: # pragma: no cover + logger.error("No 'family' section found at top of phenopacket YAML file") + raise typer.Exit(1) + try: + family = ParseDict(js_dict=yaml_dict["family"], message=Family()) + except ParseError as e: # pragma: no cover + logger.error("Could not load phenopacket data: %s", e) + raise typer.Exit(1) + else: + create_output = True + family = Family() + + # sync members in PED and phenopackets file + assert False + + # write out phenopackets file + if create_output: + logger.info("Creating new phenopacket file %s", phenopacket_file) + else: + timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M%S") + path_bak = f"{phenopacket_file}.bak~{timestamp}" + logger.info("Move file %s => %s", phenopacket_file, path_bak) + os.rename(phenopacket_file, path_bak) + logger.info("Re-creating phenopacket file %s", phenopacket_file) + with open(phenopacket_file, "wt") as outputf: + yaml.dump({"family": MessageToDict(family)}, outputf) + + logger.info("All done. Have a nice day!") From e5af8d48e0760d23ef64a85894f441a6385c3349 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Thu, 11 Jan 2024 08:16:00 +0100 Subject: [PATCH 7/7] wip --- tests/test_ftypes.py | 2 +- varfish_cli/cli/cases_import.py | 52 +++++++++++++++++-- varfish_cli/{ftypes/__init__.py => ftypes.py} | 0 varfish_cli/parse_ped.py | 2 +- 4 files changed, 49 insertions(+), 7 deletions(-) rename varfish_cli/{ftypes/__init__.py => ftypes.py} (100%) diff --git a/tests/test_ftypes.py b/tests/test_ftypes.py index 6389fe8..62d8ee2 100644 --- a/tests/test_ftypes.py +++ b/tests/test_ftypes.py @@ -1,4 +1,4 @@ -from varfish_cli.ftypes import FileType, guess_by_path +from varfish_cli.ftypes.ftypes import FileType, guess_by_path def test_guess_by_path(): diff --git a/varfish_cli/cli/cases_import.py b/varfish_cli/cli/cases_import.py index 0bfe622..1cfedf8 100644 --- a/varfish_cli/cli/cases_import.py +++ b/varfish_cli/cli/cases_import.py @@ -5,20 +5,34 @@ import typing import uuid +#from google.protobuf import CopyFrom from google.protobuf.json_format import MessageToDict, ParseDict, ParseError from logzero import logger from phenopackets import Family import typer import yaml -from varfish_cli import api, common +from varfish_cli import api, common, ftypes from varfish_cli.cli.common import ListObjects, RetrieveObject from varfish_cli.common import DEFAULT_PATH_VARFISHRC, OutputFormat +from varfish_cli.parse_ped import Donor, parse_ped #: The ``Typer`` instance to use for the ``cases-import`` sub command. app = typer.Typer(no_args_is_help=True) +def sync_family_with_donors(family: Family, donors: typing.Dict[str, Donor]) -> Family: + """Sync the given family with the given donors. + + The first donor is assumed to be the index. + """ + family.proband + + # make a copy of the family + result = Family() + result.CopyFrom(family) + + @app.command("bootstrap-phenopackets") def cli_bootstrap_phenopackets( ctx: typer.Context, @@ -46,10 +60,29 @@ def cli_bootstrap_phenopackets( raise typer.Exit(1) # split files by file type - assert False + files_by_type = {} + for other_file in other_files: + file_type = ftypes.guess_by_path(other_file) + if file_type not in files_by_type: + files_by_type[file_type] = [other_file] + else: + files_by_type[file_type].append(other_file) + for file_type, files in sorted(files_by_type.items()): + if file_type is ftypes.FileType.UNKNOWN: + logger.warn("could not determine file type for %d files", len(files)) + else: + logger.info("%d files of type %s", len(files), file_type.value) + for file_ in files: + logger.info(" - %s", file_) # if we do not have a phenopacket file, ensure that we have a PED file - assert False + num_peds = len(files_by_type.get(ftypes.FileType.PED, [])) + if not os.path.exists(phenopacket_file) and num_peds == 0: + logger.error("No PED file given and no phenopacket file given") + raise typer.Exit(1) + if num_peds > 1: + logger.error("More than one PED file given") + raise typer.Exit(1) # load phenopacket file or create new one family: Family @@ -74,8 +107,17 @@ def cli_bootstrap_phenopackets( create_output = True family = Family() - # sync members in PED and phenopackets file - assert False + # load pedigree and sync members in PED and phenopackets file + if num_peds == 1: + path_ped = files_by_type[ftypes.FileType.PED][0] + if not os.path.exists(path_ped): + logger.error("PED file %s does not exist", path_ped) + raise typer.Exit(1) + with open(path_ped, "rt") as inputf: + donors = parse_ped(inputf) + donors_by_ped = {donor.name: donor for donor in donors} + family = sync_family_with_donors(family, donors_by_ped) + # write out phenopackets file if create_output: diff --git a/varfish_cli/ftypes/__init__.py b/varfish_cli/ftypes.py similarity index 100% rename from varfish_cli/ftypes/__init__.py rename to varfish_cli/ftypes.py diff --git a/varfish_cli/parse_ped.py b/varfish_cli/parse_ped.py index c14e09e..f1608ac 100644 --- a/varfish_cli/parse_ped.py +++ b/varfish_cli/parse_ped.py @@ -25,7 +25,7 @@ class Donor(pydantic.BaseModel): disease: str -def parse_ped(ped_file) -> typing.List[Donor]: +def parse_ped(ped_file: typing.TextIO) -> typing.List[Donor]: """Parse a given PED file and return ``Donor`` objects.""" lines = [] for line in ped_file.readlines():