From 5ba259a24c838bad6ce0714a08602dfa4162ecb7 Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Fri, 2 May 2025 19:57:41 +0200 Subject: [PATCH 01/12] add: multiple field on plugin --- openvariant/annotation/annotation.py | 16 ++++++++++------ openvariant/variant/variant.py | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/openvariant/annotation/annotation.py b/openvariant/annotation/annotation.py index 3b9359e..48ab191 100644 --- a/openvariant/annotation/annotation.py +++ b/openvariant/annotation/annotation.py @@ -67,7 +67,7 @@ def _check_annotation_keys(annot: dict) -> None: raise ValueError(f"'{AnnotationKeys.TYPE.value}' value is wrong.") # Field key - if AnnotationKeys.FIELD.value not in annot or not isinstance(annot[AnnotationKeys.FIELD.value], str): + if AnnotationKeys.FIELD.value not in annot or (not isinstance(annot[AnnotationKeys.FIELD.value], list) and not isinstance(annot[AnnotationKeys.FIELD.value], str)): raise KeyError(f"'{AnnotationKeys.FIELD.value}' key not found or is not a str.") # Value key @@ -125,10 +125,13 @@ def _read_annotation_file(self) -> dict: logging.error(exc) stream.close() + def _clean_annotation_keys(self): + return [item for x in self.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])] + def _check_columns(self) -> None: """Check if columns exists as annotation fields""" for col in self._columns: - if col not in self._annotations: + if col not in self._clean_annotation_keys(): raise KeyError(f"'{col}' column unable to find.") def __init__(self, annotation_path: str) -> None: @@ -165,15 +168,16 @@ def __init__(self, annotation_path: str) -> None: self._annotations: dict = {} for k in raw_annotation.get(AnnotationGeneralKeys.ANNOTATION.value, []): - class_name = k[AnnotationKeys.TYPE.value].upper() module_name = "openvariant.annotation.builder" ClassAnnotation = import_class_from_module(module_name, class_name) instance = ClassAnnotation() + if isinstance(k[AnnotationKeys.FIELD.value], list): + self._annotations[tuple(k[AnnotationKeys.FIELD.value])] = instance(k, self._path) + else: + self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path) - self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path) - - self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, list(self.annotations.keys())) + self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, self._clean_annotation_keys()) self._check_columns() @property diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py index e31e270..8afe428 100644 --- a/openvariant/variant/variant.py +++ b/openvariant/variant/variant.py @@ -108,6 +108,7 @@ def _extract_header(file_path: str, original_header: list, annotation: Annotatio instance = ClassAnnotation() header_schema.update({field: instance(ann, original_header, file_path, header_schema)}) + return header_schema, annotation.columns @@ -180,8 +181,9 @@ def __init__(self, path: str, annotation: Annotation, skip_files: bool = False) csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) self._path: str = path self._annotation: Annotation = annotation - self._header: List[str] = list(annotation.annotations.keys()) if len(annotation.columns) == 0 \ - else annotation.columns + + #annotation_keys = [item for x in annotation.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])] + self._header: List[str] = annotation.columns self.skip_files = skip_files def _unify(self, base_path: str, annotation: Annotation, group_by: str = None, display_header: bool = True) \ @@ -216,6 +218,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display row, plugin_values, mapping_values = {}, {}, {} for head in annotation.annotations.keys(): type_ann, value, func = header[head] + if type_ann == AnnotationTypes.PLUGIN.name: plugin_values[head] = header[head] elif type_ann == AnnotationTypes.MAPPING.name: @@ -239,9 +242,15 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display for head, mapping in mapping_values.items(): _, builder_mapping, func = mapping line_dict[head] = _parse_mapping_field(builder_mapping, line_dict, func) + for head, plug in plugin_values.items(): _, ctxt_plugin, func_plugin = plug - line_dict[head] = _parse_plugin_field(line_dict, head, file_path, ctxt_plugin, func_plugin) + value_plugin = _parse_plugin_field(line_dict, head, file_path, ctxt_plugin, func_plugin) + if isinstance(head, tuple): + for idx, x in enumerate(head): + line_dict[x] = value_plugin[idx] + else: + line_dict[head] = value_plugin for k in annotation.columns: row[k] = line_dict[k].format(**line_dict) From 7058662f19e418e43487e4a2fb7e7abd5ab85b0e Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Fri, 16 May 2025 19:01:00 +0200 Subject: [PATCH 02/12] fix: little checks --- openvariant/annotation/annotation.py | 1 - openvariant/variant/variant.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/openvariant/annotation/annotation.py b/openvariant/annotation/annotation.py index 48ab191..e8e22b0 100644 --- a/openvariant/annotation/annotation.py +++ b/openvariant/annotation/annotation.py @@ -176,7 +176,6 @@ def __init__(self, annotation_path: str) -> None: self._annotations[tuple(k[AnnotationKeys.FIELD.value])] = instance(k, self._path) else: self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path) - self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, self._clean_annotation_keys()) self._check_columns() diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py index 8afe428..ebba768 100644 --- a/openvariant/variant/variant.py +++ b/openvariant/variant/variant.py @@ -204,7 +204,6 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display try: self.mm, self.file = _open_file(file_path, "rb") - for lnum, line in _base_parser(self.mm, file_path, annotation.delimiter, self.skip_files): try: if header is None: @@ -218,7 +217,6 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display row, plugin_values, mapping_values = {}, {}, {} for head in annotation.annotations.keys(): type_ann, value, func = header[head] - if type_ann == AnnotationTypes.PLUGIN.name: plugin_values[head] = header[head] elif type_ann == AnnotationTypes.MAPPING.name: From 25232eb4cb631c1fb174984837ff85510188368d Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Sun, 18 May 2025 17:01:01 +0200 Subject: [PATCH 03/12] add: examples and change docs --- docs/examples/plugin_examples.rst | 192 +++++++++++++++++- .../HGVS_decoder/HGVS_decoder.py | 134 ++++++++++++ .../plugin_system/HGVS_decoder/__init__.py | 2 + .../plugin_system/plugin_system.ipynb | 93 +++++++-- examples/datasets/sample4/gnomAD.csv | 20 ++ examples/datasets/sample4/sample4.yaml | 22 ++ .../HGVS_decoder/HGVS_decoder.py | 134 ++++++++++++ .../plugin_system/HGVS_decoder/__init__.py | 2 + examples/plugin_system/README.md | 10 +- examples/plugin_system/plugin_system.ipynb | 93 +++++++-- 10 files changed, 666 insertions(+), 36 deletions(-) create mode 100644 docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py create mode 100644 docs/examples/plugin_system/HGVS_decoder/__init__.py create mode 100644 examples/datasets/sample4/gnomAD.csv create mode 100644 examples/datasets/sample4/sample4.yaml create mode 100644 examples/plugin_system/HGVS_decoder/HGVS_decoder.py create mode 100644 examples/plugin_system/HGVS_decoder/__init__.py diff --git a/docs/examples/plugin_examples.rst b/docs/examples/plugin_examples.rst index 9e6af80..8ad942f 100644 --- a/docs/examples/plugin_examples.rst +++ b/docs/examples/plugin_examples.rst @@ -3,12 +3,18 @@ Plugin examples =============================== + + **OpenVariant** offers a plugin system, where the user will be able to build their own plugins and make a customized data transformation. First of all, you will need to create a plugin; hence, check :ref:`Command-line interface` section and :ref:`Command-line interface examples` to understand how a plugin template can be generated. Also, it is important to know how plugins works and how they are composed in order to understand the following examples that we introduce. -We are going to introduce you two little plugins that we will use them in the example. The two plugins are described and built as: +Unique field plugin +---------------------- + +Plugins can modify individual fields, and in this example, we introduce two small plugins that are described and +implemented as follows: *Add date* plugin ######################## @@ -93,7 +99,188 @@ extract the length between the two fields. return context.row[context.field_name] -These two plugins are used in the following example: +Multiple fields plugin +------------------------- + +The plugin system allows transforming multiple fields simultaneously, and can be constructed as follows: + +*HGVS decoder* plugin +####################### + +`The Human Genome Variation Society (HGVS) Nomenclature `_ is the global standard +for describing DNA, RNA, and protein sequence variants. It is widely used in clinical reports, scientific publications, +and variant databases to communicate genetic changes. HGVS variants are expressed using a specific syntax that encodes +detailed information about the type and location of the change (e.g `c.76A>T`, `r.76_78del`, `p.Gly76_Val78del`). + +In this plugin, we decode HGVS expressions by identifying and separating the variant type (*TYPE*), its position (*POSITION*), +and the specific change that occurs (*VARIANT*). + +The *annotation* file with multiple fields can be described as: + +.. code-block:: yaml + + columns: + - TYPE + - POSITION + - VARIANT + + annotation: + - type: plugin + plugin: multi_test + field: + - TYPE + - POSITION + - VARIANT + - type: internal + field: HGVS + fieldSource: + - 'HGVS Consequence' + - HGVSp + + +We built the plugin with attention to the order of the different fields it processes. + +.. code-block:: python + + from openvariant.plugins.context import Context + from openvariant.plugins.plugin import Plugin + + import re + + class HGVS_decoderContext(Context): + + def __init__(self, row: dict, field_name: str, file_path: str) -> None: + super().__init__(row, field_name, file_path) + + + amino_acids_map = { + "Ala": "Alanine", + "Arg": "Arginine", + "Asn": "Asparagine", + "Asp": "Aspartic Acid", + "Cys": "Cysteine", + "Gln": "Glutamine", + "Glu": "Glutamic Acid", + "Gly": "Glycine", + "His": "Histidine", + "Ile": "Isoleucine", + "Leu": "Leucine", + "Lys": "Lysine", + "Met": "Methionine", + "Phe": "Phenylalanine", + "Pro": "Proline", + "Ser": "Serine", + "Thr": "Threonine", + "Trp": "Tryptophan", + "Tyr": "Tyrosine", + "Val": "Valine", + "Ter": "Termination codon" + } + + variant_map = { + "delins": "deletion-insertion by ", + "del": "deletion", + "ins": "insertion of ", + "dup": "duplication", + "inv": "inversion", + "con": "conversion", + "ext": "extension of ", + "fs": "frameshift mutation of " + } + + position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)') + protein_position_regex = re.compile(r'(?[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+') + variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+') + + amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)' + variant_protein_aa_regex = re.compile(rf'(? 0: + variant = variant_map.get(matches_variant[0]) + matches_n = re.findall(nucleotides, matches[0]) + if len(matches_n) > 0: + variant += matches_n[0] + else: + variant = matches[0] + return variant + + def parse_hgvs_variant_protein(hgvs_str): + matches = re.findall(variant_protein_aa_regex, hgvs_str) + if len(matches) == 1: + variant = amino_acids_map.get(matches[0]) + else: + aa_1 = amino_acids_map.get(matches[0]) + aa_2 = amino_acids_map.get(matches[1]) + if aa_1 == aa_2: + variant = "Synonymous (silent) variant" + else: + variant = aa_1 + " mutated to " + aa_2 + matches = re.findall(variant_protein_mod_regex, hgvs_str) + if len(matches) > 0: + variant += " and " + matches_variant = re.findall(variant_type_regex, matches[0]) + variant += variant_map.get(matches_variant[0]) + matches_amino_acid = re.findall(amino_acids, matches[0]) + if len(matches_amino_acid) > 0: + variant += amino_acids_map.get(matches_amino_acid[0]) + return variant + + def interpret_hgvs(hgvs_str): + prefix_map = { + "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant), + "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant), + "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant), + "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant), + "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant), + "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein), + } + + prefix = hgvs_str[:2] + + result = prefix_map.get(prefix, ("Unknown", [], [])) + seq = hgvs_str[2:] + + type_variant = result[0] + position = result[1](seq) + variant = result[2](seq) + + return type_variant, position, variant + + + + class HGVS_decoderPlugin(Plugin): + + def run(self, context: HGVS_decoderContext) -> dict: + + value = context.row["HGVS"] + type_variant, position, variant = interpret_hgvs(value) + + return type_variant, position, variant + + + +We can find all the examples on the repository: `OpenVariant examples `_ +and these plugins are used in the following examples: .. nbgallery:: :name: Plugin System examples @@ -101,4 +288,3 @@ These two plugins are used in the following example: plugin_system/plugin_system.ipynb -We can find all the examples on the repository: `OpenVariant examples `_. \ No newline at end of file diff --git a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py new file mode 100644 index 0000000..881462c --- /dev/null +++ b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py @@ -0,0 +1,134 @@ +from openvariant.plugins.context import Context +from openvariant.plugins.plugin import Plugin + +import re + +class HGVS_decoderContext(Context): + + def __init__(self, row: dict, field_name: str, file_path: str) -> None: + super().__init__(row, field_name, file_path) + + +amino_acids_map = { + "Ala": "Alanine", + "Arg": "Arginine", + "Asn": "Asparagine", + "Asp": "Aspartic Acid", + "Cys": "Cysteine", + "Gln": "Glutamine", + "Glu": "Glutamic Acid", + "Gly": "Glycine", + "His": "Histidine", + "Ile": "Isoleucine", + "Leu": "Leucine", + "Lys": "Lysine", + "Met": "Methionine", + "Phe": "Phenylalanine", + "Pro": "Proline", + "Ser": "Serine", + "Thr": "Threonine", + "Trp": "Tryptophan", + "Tyr": "Tyrosine", + "Val": "Valine", + "Ter": "Termination codon" +} + +variant_map = { + "delins": "deletion-insertion by ", + "del": "deletion", + "ins": "insertion of ", + "dup": "duplication", + "inv": "inversion", + "con": "conversion", + "ext": "extension of ", + "fs": "frameshift mutation of " +} + +position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)') +protein_position_regex = re.compile(r'(?[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+') +variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+') + +amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)' +variant_protein_aa_regex = re.compile(rf'(? 0: + variant = variant_map.get(matches_variant[0]) + matches_n = re.findall(nucleotides, matches[0]) + if len(matches_n) > 0: + variant += matches_n[0] + else: + variant = matches[0] + return variant + +def parse_hgvs_variant_protein(hgvs_str): + matches = re.findall(variant_protein_aa_regex, hgvs_str) + if len(matches) == 1: + variant = amino_acids_map.get(matches[0]) + else: + aa_1 = amino_acids_map.get(matches[0]) + aa_2 = amino_acids_map.get(matches[1]) + if aa_1 == aa_2: + variant = "Synonymous (silent) variant" + else: + variant = aa_1 + " mutated to " + aa_2 + matches = re.findall(variant_protein_mod_regex, hgvs_str) + if len(matches) > 0: + variant += " and " + matches_variant = re.findall(variant_type_regex, matches[0]) + variant += variant_map.get(matches_variant[0]) + matches_amino_acid = re.findall(amino_acids, matches[0]) + if len(matches_amino_acid) > 0: + variant += amino_acids_map.get(matches_amino_acid[0]) + return variant + +def interpret_hgvs(hgvs_str): + prefix_map = { + "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant), + "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant), + "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant), + "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant), + "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant), + "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein), + } + + prefix = hgvs_str[:2] + + result = prefix_map.get(prefix, ("Unknown", [], [])) + seq = hgvs_str[2:] + + type_variant = result[0] + position = result[1](seq) + variant = result[2](seq) + + return type_variant, position, variant + + + +class HGVS_decoderPlugin(Plugin): + + def run(self, context: HGVS_decoderContext) -> dict: + + value = context.row["HGVS"] + type_variant, position, variant = interpret_hgvs(value) + + return type_variant, position, variant diff --git a/docs/examples/plugin_system/HGVS_decoder/__init__.py b/docs/examples/plugin_system/HGVS_decoder/__init__.py new file mode 100644 index 0000000..a9b2ec9 --- /dev/null +++ b/docs/examples/plugin_system/HGVS_decoder/__init__.py @@ -0,0 +1,2 @@ +import .multi_test from Multi_testPlugin +import .multi_test from Multi_testContext diff --git a/docs/examples/plugin_system/plugin_system.ipynb b/docs/examples/plugin_system/plugin_system.ipynb index 8469a7c..5a741d6 100644 --- a/docs/examples/plugin_system/plugin_system.ipynb +++ b/docs/examples/plugin_system/plugin_system.ipynb @@ -2,19 +2,40 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# Plugin system example" - ], "metadata": { "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "# Plugin system example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unique field plugin\n", + "\n", + "Includes the Add Date plugin and the Get Length plugin." + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -41,34 +62,74 @@ "source": [ "%%bash\n", "openvar cat ../datasets/sample3 --header" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiple fields plugin\n", + "\n", + "Decoding HGVS across different variants" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TYPE\tPOSITION\tVARIANT\n", + "cDNA\t-33-42\tG>T\n", + "cDNA\t-33-42\tG>C\n", + "cDNA\t-33-42\tG>A\n", + "cDNA\t-33-39\tT>C\n", + "cDNA\t-33-37\tC>A\n", + "cDNA\t-33-36\tC>T\n", + "cDNA\t-33-34\tA>G\n", + "cDNA\t-33-33\tG>A\n", + "cDNA\t-33-30\tT>C\n", + "cDNA\t-33-28\tC>A\n", + "cDNA\t-33-27\tT>C\n", + "cDNA\t-33-25\tduplication\n", + "cDNA\t-33-24\tG>C\n", + "cDNA\t-33-24\tG>A\n", + "cDNA\t-33-23\tG>A\n", + "cDNA\t-33-21\tG>A\n", + "cDNA\t-33-20\tT>A\n", + "cDNA\t-33-19\tC>G\n", + "cDNA\t-33-19\tC>A\n" + ] } - } + ], + "source": [ + "%%bash\n", + "openvar cat ../datasets/sample4 --header" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.13.2" } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 4 +} diff --git a/examples/datasets/sample4/gnomAD.csv b/examples/datasets/sample4/gnomAD.csv new file mode 100644 index 0000000..73b6f66 --- /dev/null +++ b/examples/datasets/sample4/gnomAD.csv @@ -0,0 +1,20 @@ +gnomAD ID,Chromosome,Position,rsIDs,Reference,Alternate,Filters - exomes,Filters - genomes,Transcript,HGVS Consequence,Protein Consequence,Transcript Consequence,VEP Annotation,ClinVar Germline Classification,ClinVar Variation ID,Flags,Allele Count,Allele Number,Allele Frequency,Homozygote Count,Hemizygote Count,Filters - joint,GroupMax FAF group,GroupMax FAF frequency,cadd,revel_max,spliceai_ds_max,pangolin_largest_ds,phylop,sift_max,polyphen_max,Allele Count African/African American,Allele Number African/African American,Homozygote Count African/African American,Hemizygote Count African/African American,Allele Count Admixed American,Allele Number Admixed American,Homozygote Count Admixed American,Hemizygote Count Admixed American,Allele Count Ashkenazi Jewish,Allele Number Ashkenazi Jewish,Homozygote Count Ashkenazi Jewish,Hemizygote Count Ashkenazi Jewish,Allele Count East Asian,Allele Number East Asian,Homozygote Count East Asian,Hemizygote Count East Asian,Allele Count European (Finnish),Allele Number European (Finnish),Homozygote Count European (Finnish),Hemizygote Count European (Finnish),Allele Count Middle Eastern,Allele Number Middle Eastern,Homozygote Count Middle Eastern,Hemizygote Count Middle Eastern,Allele Count European (non-Finnish),Allele Number European (non-Finnish),Homozygote Count European (non-Finnish),Hemizygote Count European (non-Finnish),Allele Count Amish,Allele Number Amish,Homozygote Count Amish,Hemizygote Count Amish,Allele Count South Asian,Allele Number South Asian,Homozygote Count South Asian,Hemizygote Count South Asian,Allele Count Remaining,Allele Number Remaining,Homozygote Count Remaining,Hemizygote Count Remaining +1-3682291-G-T,1,3682291,,G,T,PASS,NA,ENST00000378295.9,c.-33-42G>T,,c.-33-42G>T,intron_variant,,,,2,1375376,1.45414781121671E-06,0,0,PASS,sas,0.00000531,0.482,,0,0,-0.056,,,0,67716,0,0,0,37548,0,0,0,21442,0,0,0,35262,0,0,0,54404,0,0,0,5224,0,0,0,1038424,0,0,0,912,0,0,2,62504,0,0,0,51940,0,0 +1-3682291-G-C,1,3682291,rs534349566,G,C,PASS,PASS,ENST00000378295.9,c.-33-42G>C,,c.-33-42G>C,intron_variant,,,,59,1375374,4.28974228100866E-05,1,0,PASS,amr,0.0012026,0.419,,0,0,-0.056,,,1,67716,0,0,57,37546,1,0,0,21442,0,0,0,35262,0,0,0,54404,0,0,0,5224,0,0,0,1038424,0,0,0,912,0,0,0,62504,0,0,1,51940,0,0 +1-3682291-G-A,1,3682291,rs534349566,G,A,PASS,PASS,ENST00000378295.9,c.-33-42G>A,,c.-33-42G>A,intron_variant,,,,23,1375258,1.6724134671458E-05,0,0,PASS,nfe,0.0000117,0.644,,0.01,0,-0.056,,,0,67594,0,0,0,37528,0,0,0,21442,0,0,1,35274,0,0,1,54404,0,0,0,5246,0,0,19,1038432,0,0,0,912,0,0,0,62508,0,0,2,51918,0,0 +1-3682294-T-C,1,3682294,rs916680140,T,C,PASS,NA,ENST00000378295.9,c.-33-39T>C,,c.-33-39T>C,intron_variant,,,,2,1383912,1.44517859517079E-06,0,0,PASS,nfe,0.00000032,0.067,,0,0,-0.017,,,0,67926,0,0,0,38244,0,0,0,21612,0,0,0,35122,0,0,0,54618,0,0,0,5294,0,0,2,1044616,0,0,0,910,0,0,0,63312,0,0,0,52258,0,0 +1-3682296-C-A,1,3682296,,C,A,PASS,NA,ENST00000378295.9,c.-33-37C>A,,c.-33-37C>A,intron_variant,,,,1,1395094,7.16797577797625E-07,0,0,PASS,,,0.693,,0.03,0,-0.247,,,0,68312,0,0,0,38864,0,0,0,21976,0,0,0,35740,0,0,0,54858,0,0,0,5336,0,0,0,1051480,0,0,0,912,0,0,1,64836,0,0,0,52780,0,0 +1-3682297-C-T,1,3682297,,C,T,PASS,NA,ENST00000378295.9,c.-33-36C>T,,c.-33-36C>T,intron_variant,,,,1,1397234,7.1569973247144E-07,0,0,PASS,,,2.31,,0.02,0,-0.027,,,0,68350,0,0,1,38954,0,0,0,22022,0,0,0,35768,0,0,0,54912,0,0,0,5346,0,0,0,1053160,0,0,0,912,0,0,0,64990,0,0,0,52820,0,0 +1-3682299-A-G,1,3682299,,A,G,PASS,NA,ENST00000378295.9,c.-33-34A>G,,c.-33-34A>G,intron_variant,,,,3,1395420,2.14989035559187E-06,0,0,PASS,nfe,0.00000032,0.509,,0,-0.01,-3.89,,,0,68240,0,0,0,38698,0,0,0,21962,0,0,0,35692,0,0,0,54938,0,0,0,5348,0,0,2,1052118,0,0,0,910,0,0,1,64786,0,0,0,52728,0,0 +1-3682300-G-A,1,3682300,,G,A,PASS,NA,ENST00000378295.9,c.-33-33G>A,,c.-33-33G>A,intron_variant,,,,1,1400418,7.14072512635513E-07,0,0,PASS,,,0.078,,0.01,0,-0.611,,,0,68452,0,0,0,39038,0,0,0,22120,0,0,0,35804,0,0,0,55004,0,0,0,5354,0,0,1,1055368,0,0,0,910,0,0,0,65406,0,0,0,52962,0,0 +1-3682303-T-C,1,3682303,,T,C,PASS,NA,ENST00000378295.9,c.-33-30T>C,,c.-33-30T>C,intron_variant,,,,1,1411590,7.08421000432137E-07,0,0,PASS,,,0.334,,0,-0.01,-2.55,,,0,68850,0,0,0,40442,0,0,0,22552,0,0,0,35996,0,0,0,55292,0,0,0,5408,0,0,0,1062338,0,0,0,912,0,0,0,66380,0,0,1,53420,0,0 +1-3682305-C-A,1,3682305,,C,A,PASS,NA,ENST00000378295.9,c.-33-28C>A,,c.-33-28C>A,intron_variant,,,,1,1429328,6.99629476229389E-07,0,0,PASS,,,6.93,,0,0,2.11,,,0,69494,0,0,0,42188,0,0,0,23198,0,0,0,36648,0,0,0,55812,0,0,0,5478,0,0,0,1072304,0,0,0,912,0,0,0,68996,0,0,1,54298,0,0 +1-3682306-T-C,1,3682306,rs374235190,T,C,PASS,PASS,ENST00000378295.9,c.-33-27T>C,,c.-33-27T>C,intron_variant,,,,36,1429814,2.5178100088543E-05,0,0,PASS,sas,0.00037441,4.67,,0.02,-0.07,0.436,,,0,69468,0,0,0,42094,0,0,0,23190,0,0,0,36616,0,0,0,55790,0,0,0,5480,0,0,0,1073056,0,0,0,912,0,0,35,68932,0,0,1,54276,0,0 +1-3682307-C-CA,1,3682307,,C,CA,PASS,NA,ENST00000378295.9,c.-33-25dup,,c.-33-25dup,intron_variant,,,,1,1431932,6.98357184559043E-07,0,0,PASS,,,6.6,,0,0,2.41,,,0,69544,0,0,0,42206,0,0,0,23256,0,0,0,36706,0,0,0,55852,0,0,0,5492,0,0,1,1074286,0,0,0,912,0,0,0,69304,0,0,0,54374,0,0 +1-3682309-G-C,1,3682309,,G,C,PASS,NA,ENST00000378295.9,c.-33-24G>C,,c.-33-24G>C,intron_variant,,,,1,1442640,6.93173626129873E-07,0,0,PASS,,,0.693,,0,0,-3.09,,,0,69918,0,0,0,43250,0,0,0,23634,0,0,0,37102,0,0,0,56160,0,0,0,5522,0,0,0,1080638,0,0,0,912,0,0,0,70644,0,0,1,54860,0,0 +1-3682309-G-A,1,3682309,,G,A,PASS,NA,ENST00000378295.9,c.-33-24G>A,,c.-33-24G>A,intron_variant,,,,6,1442640,4.15904175677924E-06,0,0,PASS,nfe,0.000002,0.825,,0,0,-3.09,,,0,69918,0,0,0,43250,0,0,0,23634,0,0,0,37102,0,0,0,56160,0,0,0,5522,0,0,6,1080638,0,0,0,912,0,0,0,70644,0,0,0,54860,0,0 +1-3682310-G-A,1,3682310,rs1449761755,G,A,PASS,NA,ENST00000378295.9,c.-33-23G>A,,c.-33-23G>A,intron_variant,,,,1,1445388,6.91855750843372E-07,0,0,PASS,,,0.247,,0,0,-0.758,,,0,70038,0,0,1,43458,0,0,0,23788,0,0,0,37182,0,0,0,56218,0,0,0,5536,0,0,0,1082228,0,0,0,912,0,0,0,71036,0,0,0,54992,0,0 +1-3682312-G-A,1,3682312,,G,A,PASS,NA,ENST00000378295.9,c.-33-21G>A,,c.-33-21G>A,intron_variant,,,,5,1447908,3.45325807993326E-06,0,0,PASS,nfe,0.00000086,4.77,,0.03,0.01,0.468,,,0,70096,0,0,0,43558,0,0,0,23852,0,0,0,37252,0,0,0,56312,0,0,0,5542,0,0,4,1083938,0,0,0,912,0,0,1,71346,0,0,0,55100,0,0 +1-3682313-T-A,1,3682313,,T,A,PASS,NA,ENST00000378295.9,c.-33-20T>A,,c.-33-20T>A,intron_variant,,,,1,1446084,6.91522760780148E-07,0,0,PASS,,,3.91,,0,0,-0.524,,,1,69952,0,0,0,43328,0,0,0,23842,0,0,0,37070,0,0,0,56288,0,0,0,5542,0,0,0,1082966,0,0,0,910,0,0,0,71170,0,0,0,55016,0,0 +1-3682314-C-G,1,3682314,rs554511962,C,G,PASS,NA,ENST00000378295.9,c.-33-19C>G,,c.-33-19C>G,intron_variant,,,,6,1451188,4.13454356017277E-06,0,0,PASS,nfe,0.00000199,0.322,,0,0,0.42,,,0,70186,0,0,0,43810,0,0,0,24036,0,0,0,37346,0,0,0,56458,0,0,0,5552,0,0,6,1085908,0,0,0,912,0,0,0,71728,0,0,0,55252,0,0 +1-3682314-C-A,1,3682314,rs554511962,C,A,PASS,PASS,ENST00000378295.9,c.-33-19C>A,,c.-33-19C>A,intron_variant,,,,23,1451186,1.5849105490268E-05,0,0,PASS,eas,0.00037641,0.292,,0,0,0.42,,,0,70186,0,0,0,43810,0,0,0,24036,0,0,21,37346,0,0,0,56458,0,0,0,5552,0,0,0,1085906,0,0,0,912,0,0,1,71728,0,0,1,55252,0,0 diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml new file mode 100644 index 0000000..5f3df3f --- /dev/null +++ b/examples/datasets/sample4/sample4.yaml @@ -0,0 +1,22 @@ +pattern: + - '*.csv' + +delimiter: C + +columns: +- TYPE +- POSITION +- VARIANT + +annotation: +- type: plugin + plugin: HGVS_decoder + field: + - TYPE + - POSITION + - VARIANT +- type: internal + field: HGVS + fieldSource: + - 'HGVS Consequence' + - HGVSp diff --git a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py new file mode 100644 index 0000000..881462c --- /dev/null +++ b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py @@ -0,0 +1,134 @@ +from openvariant.plugins.context import Context +from openvariant.plugins.plugin import Plugin + +import re + +class HGVS_decoderContext(Context): + + def __init__(self, row: dict, field_name: str, file_path: str) -> None: + super().__init__(row, field_name, file_path) + + +amino_acids_map = { + "Ala": "Alanine", + "Arg": "Arginine", + "Asn": "Asparagine", + "Asp": "Aspartic Acid", + "Cys": "Cysteine", + "Gln": "Glutamine", + "Glu": "Glutamic Acid", + "Gly": "Glycine", + "His": "Histidine", + "Ile": "Isoleucine", + "Leu": "Leucine", + "Lys": "Lysine", + "Met": "Methionine", + "Phe": "Phenylalanine", + "Pro": "Proline", + "Ser": "Serine", + "Thr": "Threonine", + "Trp": "Tryptophan", + "Tyr": "Tyrosine", + "Val": "Valine", + "Ter": "Termination codon" +} + +variant_map = { + "delins": "deletion-insertion by ", + "del": "deletion", + "ins": "insertion of ", + "dup": "duplication", + "inv": "inversion", + "con": "conversion", + "ext": "extension of ", + "fs": "frameshift mutation of " +} + +position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)') +protein_position_regex = re.compile(r'(?[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+') +variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+') + +amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)' +variant_protein_aa_regex = re.compile(rf'(? 0: + variant = variant_map.get(matches_variant[0]) + matches_n = re.findall(nucleotides, matches[0]) + if len(matches_n) > 0: + variant += matches_n[0] + else: + variant = matches[0] + return variant + +def parse_hgvs_variant_protein(hgvs_str): + matches = re.findall(variant_protein_aa_regex, hgvs_str) + if len(matches) == 1: + variant = amino_acids_map.get(matches[0]) + else: + aa_1 = amino_acids_map.get(matches[0]) + aa_2 = amino_acids_map.get(matches[1]) + if aa_1 == aa_2: + variant = "Synonymous (silent) variant" + else: + variant = aa_1 + " mutated to " + aa_2 + matches = re.findall(variant_protein_mod_regex, hgvs_str) + if len(matches) > 0: + variant += " and " + matches_variant = re.findall(variant_type_regex, matches[0]) + variant += variant_map.get(matches_variant[0]) + matches_amino_acid = re.findall(amino_acids, matches[0]) + if len(matches_amino_acid) > 0: + variant += amino_acids_map.get(matches_amino_acid[0]) + return variant + +def interpret_hgvs(hgvs_str): + prefix_map = { + "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant), + "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant), + "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant), + "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant), + "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant), + "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein), + } + + prefix = hgvs_str[:2] + + result = prefix_map.get(prefix, ("Unknown", [], [])) + seq = hgvs_str[2:] + + type_variant = result[0] + position = result[1](seq) + variant = result[2](seq) + + return type_variant, position, variant + + + +class HGVS_decoderPlugin(Plugin): + + def run(self, context: HGVS_decoderContext) -> dict: + + value = context.row["HGVS"] + type_variant, position, variant = interpret_hgvs(value) + + return type_variant, position, variant diff --git a/examples/plugin_system/HGVS_decoder/__init__.py b/examples/plugin_system/HGVS_decoder/__init__.py new file mode 100644 index 0000000..a9b2ec9 --- /dev/null +++ b/examples/plugin_system/HGVS_decoder/__init__.py @@ -0,0 +1,2 @@ +import .multi_test from Multi_testPlugin +import .multi_test from Multi_testContext diff --git a/examples/plugin_system/README.md b/examples/plugin_system/README.md index 0ea62c3..c918436 100644 --- a/examples/plugin_system/README.md +++ b/examples/plugin_system/README.md @@ -1,3 +1,11 @@ # Plugin system examples -- [Plugin system](plugin_system.ipynb) - A simple example that two plugins. +### Plugins + +- [Add date](./add_date) - Plugin to add the current date. +- [Get lenght](./get_length) - Plugin to obtain the different between two values. +- [HGVS decoder](./HGVS_decoder) - Plugin to decode the type, position and change of different variants. + +### Output example + +- [Plugin system](plugin_system.ipynb) - Unique and multiple fields plugins example. diff --git a/examples/plugin_system/plugin_system.ipynb b/examples/plugin_system/plugin_system.ipynb index 8469a7c..5a741d6 100644 --- a/examples/plugin_system/plugin_system.ipynb +++ b/examples/plugin_system/plugin_system.ipynb @@ -2,19 +2,40 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# Plugin system example" - ], "metadata": { "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "# Plugin system example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unique field plugin\n", + "\n", + "Includes the Add Date plugin and the Get Length plugin." + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -41,34 +62,74 @@ "source": [ "%%bash\n", "openvar cat ../datasets/sample3 --header" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiple fields plugin\n", + "\n", + "Decoding HGVS across different variants" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TYPE\tPOSITION\tVARIANT\n", + "cDNA\t-33-42\tG>T\n", + "cDNA\t-33-42\tG>C\n", + "cDNA\t-33-42\tG>A\n", + "cDNA\t-33-39\tT>C\n", + "cDNA\t-33-37\tC>A\n", + "cDNA\t-33-36\tC>T\n", + "cDNA\t-33-34\tA>G\n", + "cDNA\t-33-33\tG>A\n", + "cDNA\t-33-30\tT>C\n", + "cDNA\t-33-28\tC>A\n", + "cDNA\t-33-27\tT>C\n", + "cDNA\t-33-25\tduplication\n", + "cDNA\t-33-24\tG>C\n", + "cDNA\t-33-24\tG>A\n", + "cDNA\t-33-23\tG>A\n", + "cDNA\t-33-21\tG>A\n", + "cDNA\t-33-20\tT>A\n", + "cDNA\t-33-19\tC>G\n", + "cDNA\t-33-19\tC>A\n" + ] } - } + ], + "source": [ + "%%bash\n", + "openvar cat ../datasets/sample4 --header" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.13.2" } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 4 +} From 8cb9d1a80222ecd7923934c80cc226ffa08948d2 Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Sun, 18 May 2025 17:05:32 +0200 Subject: [PATCH 04/12] fix: minor fix plugin examples --- docs/examples/plugin_examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples/plugin_examples.rst b/docs/examples/plugin_examples.rst index 8ad942f..c387cf1 100644 --- a/docs/examples/plugin_examples.rst +++ b/docs/examples/plugin_examples.rst @@ -126,7 +126,7 @@ The *annotation* file with multiple fields can be described as: annotation: - type: plugin - plugin: multi_test + plugin: HGVS_decoder field: - TYPE - POSITION From 7613c8102f6239b7344bd02738e3732383db144e Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Sun, 18 May 2025 22:49:21 +0200 Subject: [PATCH 05/12] fix: minor fix --- examples/datasets/sample4/sample4.yaml | 1 + openvariant/variant/variant.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml index 5f3df3f..403ba28 100644 --- a/examples/datasets/sample4/sample4.yaml +++ b/examples/datasets/sample4/sample4.yaml @@ -4,6 +4,7 @@ pattern: delimiter: C columns: +- HGVS - TYPE - POSITION - VARIANT diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py index ebba768..9581170 100644 --- a/openvariant/variant/variant.py +++ b/openvariant/variant/variant.py @@ -198,7 +198,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display header, row, row_header = None, {}, [] matches = [check_extension(ext, file_path) for ext in annotation.patterns] - + print(annotation._annotations) if not any(matches): raise NameError("Annotation patterns don't match with input file.") From a8bbe70c034f644f7733409919d0ed62d98f907b Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Sun, 18 May 2025 22:51:19 +0200 Subject: [PATCH 06/12] fix: minor fix --- examples/datasets/sample4/sample4.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml index 403ba28..5f3df3f 100644 --- a/examples/datasets/sample4/sample4.yaml +++ b/examples/datasets/sample4/sample4.yaml @@ -4,7 +4,6 @@ pattern: delimiter: C columns: -- HGVS - TYPE - POSITION - VARIANT From daeda16d5e39798c9d2fc654fa04d04221feaa17 Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Sun, 18 May 2025 23:41:03 +0200 Subject: [PATCH 07/12] fix: test fix --- openvariant/variant/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py index 9581170..ebba768 100644 --- a/openvariant/variant/variant.py +++ b/openvariant/variant/variant.py @@ -198,7 +198,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display header, row, row_header = None, {}, [] matches = [check_extension(ext, file_path) for ext in annotation.patterns] - print(annotation._annotations) + if not any(matches): raise NameError("Annotation patterns don't match with input file.") From 434ed5212a748df9adb542bb9144592369897636 Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Wed, 21 May 2025 15:40:45 +0200 Subject: [PATCH 08/12] fix: bug on HGVS decoder --- docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py | 5 ++++- examples/plugin_system/HGVS_decoder/HGVS_decoder.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py index 881462c..b23c87a 100644 --- a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py +++ b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py @@ -101,6 +101,9 @@ def parse_hgvs_variant_protein(hgvs_str): variant += amino_acids_map.get(matches_amino_acid[0]) return variant +def parse_hgvs_unknow(hgvs_str): + return None + def interpret_hgvs(hgvs_str): prefix_map = { "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant), @@ -113,7 +116,7 @@ def interpret_hgvs(hgvs_str): prefix = hgvs_str[:2] - result = prefix_map.get(prefix, ("Unknown", [], [])) + result = prefix_map.get(prefix, ("Unknown", parse_hgvs_unknow, parse_hgvs_unknow)) seq = hgvs_str[2:] type_variant = result[0] diff --git a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py index 881462c..b23c87a 100644 --- a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py +++ b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py @@ -101,6 +101,9 @@ def parse_hgvs_variant_protein(hgvs_str): variant += amino_acids_map.get(matches_amino_acid[0]) return variant +def parse_hgvs_unknow(hgvs_str): + return None + def interpret_hgvs(hgvs_str): prefix_map = { "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant), @@ -113,7 +116,7 @@ def interpret_hgvs(hgvs_str): prefix = hgvs_str[:2] - result = prefix_map.get(prefix, ("Unknown", [], [])) + result = prefix_map.get(prefix, ("Unknown", parse_hgvs_unknow, parse_hgvs_unknow)) seq = hgvs_str[2:] type_variant = result[0] From 75ef803e94a977966d181ea211d3f678a71c8b6b Mon Sep 17 00:00:00 2001 From: David Martinez Millan Date: Wed, 21 May 2025 16:56:06 +0200 Subject: [PATCH 09/12] add: docs on plugin system --- docs/user_guide/annotation_structure.rst | 20 ++++++++++++++++++-- docs/user_guide/plugin_system.rst | 16 +++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/docs/user_guide/annotation_structure.rst b/docs/user_guide/annotation_structure.rst index c99e3ed..c2bfb6a 100644 --- a/docs/user_guide/annotation_structure.rst +++ b/docs/user_guide/annotation_structure.rst @@ -228,12 +228,12 @@ Plugin ############# It will apply the plugin functionality to each row of the `input` file. The plugin can be internal, located into `plugin` -folder or can be customized and created by the user. See further details in :ref:`Plugin system` section. +folder or can be customized and created by the user. The parameters that `Plugin` needs are: * ``type``: type of annotation. (required) -* ``field``: name that will appear as a head column of this annotation. (required) +* ``field``: a single name or a list of fields that that will appear as a head column of this annotation. (required) * ``plugin``: name of plugin to apply (required) .. code-block:: yaml @@ -243,6 +243,22 @@ The parameters that `Plugin` needs are: field: 'ALT_TYPE' plugin: 'alteration_type' +The plugin system supports multiple fields, however, the order and number of fields must be consistent between the +annotation and the plugin implementation. + +.. code-block:: yaml + + # Example: + - type: 'plugin' + field: + - 'Chr' + - 'Start' + - 'End' + - 'Alt' + - 'Ref' + plugin: 'variant_decoder' + +See further details in :ref:`Plugin system` section. Exclude (optional) ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/user_guide/plugin_system.rst b/docs/user_guide/plugin_system.rst index 572caae..b81c8bb 100644 --- a/docs/user_guide/plugin_system.rst +++ b/docs/user_guide/plugin_system.rst @@ -20,7 +20,7 @@ visualize how the different classes are connected and composed with **OpenVarian | As we have mentioned before, the plugin has to be present on the `annotation` file in order to be used. Custom plugins will be placed in the -folder where the environment variable :bash:`OPENVAR_PLUGIN` points (:bash:`/home/user/.local/share/openvariant/` by default). +folder where the environment variable ``OPENVAR_PLUGIN`` points (``/home/user/.local/share/openvariant/`` by default). The `Builder` will manage to find them and apply the data transformation. Plugins will inherit `Context` and `Plugin` as base classes for each plugin. These classes are described as it follows: @@ -62,5 +62,19 @@ Plugins will inherit `Context` and `Plugin` as base classes for each plugin. The """ raise NotImplementedError +A plugin can return either a single field or multiple fields. Both cases are handled as follows: + +*Returning a single field:* + +.. code-block:: python + + return position + +*Returning multiple fields:* + +.. code-block:: python + + return chromosome, start, end, alt, ref + Check :ref:`Command-line interface` to know how to create a new plugin. Also, to check more examples on how plugins can be applied and written, see :ref:`Plugin examples`. \ No newline at end of file From 8311288c6a170fbb219ebad17688c166a62a741d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Thu, 22 May 2025 13:53:58 +0200 Subject: [PATCH 10/12] fix: update variant.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- openvariant/variant/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py index ebba768..31c2a19 100644 --- a/openvariant/variant/variant.py +++ b/openvariant/variant/variant.py @@ -182,7 +182,7 @@ def __init__(self, path: str, annotation: Annotation, skip_files: bool = False) self._path: str = path self._annotation: Annotation = annotation - #annotation_keys = [item for x in annotation.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])] + self._header: List[str] = annotation.columns self.skip_files = skip_files From 80c8229f2d50d42046e6000dd200d79ec2fa01e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Thu, 22 May 2025 13:54:32 +0200 Subject: [PATCH 11/12] fix: update plugin_system README Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- examples/plugin_system/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plugin_system/README.md b/examples/plugin_system/README.md index c918436..fdc90f3 100644 --- a/examples/plugin_system/README.md +++ b/examples/plugin_system/README.md @@ -3,7 +3,7 @@ ### Plugins - [Add date](./add_date) - Plugin to add the current date. -- [Get lenght](./get_length) - Plugin to obtain the different between two values. +- [Get length](./get_length) - Plugin to obtain the difference between two values. - [HGVS decoder](./HGVS_decoder) - Plugin to decode the type, position and change of different variants. ### Output example From ecb18f0f49c47384a6fb8f9c20d04ac154ad4914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Thu, 22 May 2025 13:55:02 +0200 Subject: [PATCH 12/12] fix: typo in docs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/user_guide/annotation_structure.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/annotation_structure.rst b/docs/user_guide/annotation_structure.rst index c2bfb6a..6caa278 100644 --- a/docs/user_guide/annotation_structure.rst +++ b/docs/user_guide/annotation_structure.rst @@ -233,7 +233,7 @@ folder or can be customized and created by the user. The parameters that `Plugin` needs are: * ``type``: type of annotation. (required) -* ``field``: a single name or a list of fields that that will appear as a head column of this annotation. (required) +* ``field``: a single name or a list of fields that will appear as a head column of this annotation. (required) * ``plugin``: name of plugin to apply (required) .. code-block:: yaml