From 5ba259a24c838bad6ce0714a08602dfa4162ecb7 Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Fri, 2 May 2025 19:57:41 +0200
Subject: [PATCH 01/12] add: multiple field on plugin

---
 openvariant/annotation/annotation.py | 16 ++++++++++------
 openvariant/variant/variant.py       | 15 ++++++++++++---
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/openvariant/annotation/annotation.py b/openvariant/annotation/annotation.py
index 3b9359e..48ab191 100644
--- a/openvariant/annotation/annotation.py
+++ b/openvariant/annotation/annotation.py
@@ -67,7 +67,7 @@ def _check_annotation_keys(annot: dict) -> None:
         raise ValueError(f"'{AnnotationKeys.TYPE.value}' value is wrong.")
 
     # Field key
-    if AnnotationKeys.FIELD.value not in annot or not isinstance(annot[AnnotationKeys.FIELD.value], str):
+    if AnnotationKeys.FIELD.value not in annot or (not isinstance(annot[AnnotationKeys.FIELD.value], list) and  not isinstance(annot[AnnotationKeys.FIELD.value], str)):
         raise KeyError(f"'{AnnotationKeys.FIELD.value}' key not found or is not a str.")
 
     # Value key
@@ -125,10 +125,13 @@ def _read_annotation_file(self) -> dict:
                 logging.error(exc)
             stream.close()
 
+    def _clean_annotation_keys(self):
+        return [item for x in self.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])]
+
     def _check_columns(self) -> None:
         """Check if columns exists as annotation fields"""
         for col in self._columns:
-            if col not in self._annotations:
+            if col not in self._clean_annotation_keys():
                 raise KeyError(f"'{col}' column unable to find.")
 
     def __init__(self, annotation_path: str) -> None:
@@ -165,15 +168,16 @@ def __init__(self, annotation_path: str) -> None:
 
         self._annotations: dict = {}
         for k in raw_annotation.get(AnnotationGeneralKeys.ANNOTATION.value, []):
-
             class_name = k[AnnotationKeys.TYPE.value].upper()
             module_name = "openvariant.annotation.builder"
             ClassAnnotation = import_class_from_module(module_name, class_name)
             instance = ClassAnnotation()
+            if isinstance(k[AnnotationKeys.FIELD.value], list):
+                self._annotations[tuple(k[AnnotationKeys.FIELD.value])] = instance(k, self._path)
+            else:
+                self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path)
 
-            self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path)
-
-        self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, list(self.annotations.keys()))
+        self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, self._clean_annotation_keys())
         self._check_columns()
 
     @property
diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py
index e31e270..8afe428 100644
--- a/openvariant/variant/variant.py
+++ b/openvariant/variant/variant.py
@@ -108,6 +108,7 @@ def _extract_header(file_path: str, original_header: list, annotation: Annotatio
         instance = ClassAnnotation()
         
         header_schema.update({field: instance(ann, original_header, file_path, header_schema)})
+
     return header_schema, annotation.columns
 
 
@@ -180,8 +181,9 @@ def __init__(self, path: str, annotation: Annotation, skip_files: bool = False)
         csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
         self._path: str = path
         self._annotation: Annotation = annotation
-        self._header: List[str] = list(annotation.annotations.keys()) if len(annotation.columns) == 0 \
-            else annotation.columns
+
+        #annotation_keys = [item for x in annotation.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])]
+        self._header: List[str] = annotation.columns
         self.skip_files = skip_files
 
     def _unify(self, base_path: str, annotation: Annotation, group_by: str = None, display_header: bool = True) \
@@ -216,6 +218,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
                         row, plugin_values, mapping_values = {}, {}, {}
                         for head in annotation.annotations.keys():
                             type_ann, value, func = header[head]
+
                             if type_ann == AnnotationTypes.PLUGIN.name:
                                 plugin_values[head] = header[head]
                             elif type_ann == AnnotationTypes.MAPPING.name:
@@ -239,9 +242,15 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
                         for head, mapping in mapping_values.items():
                             _, builder_mapping, func = mapping
                             line_dict[head] = _parse_mapping_field(builder_mapping, line_dict, func)
+
                         for head, plug in plugin_values.items():
                             _, ctxt_plugin, func_plugin = plug
-                            line_dict[head] = _parse_plugin_field(line_dict, head, file_path, ctxt_plugin, func_plugin)
+                            value_plugin = _parse_plugin_field(line_dict, head, file_path, ctxt_plugin, func_plugin)
+                            if isinstance(head, tuple):
+                                for idx, x in enumerate(head):
+                                    line_dict[x] = value_plugin[idx]
+                            else:
+                                line_dict[head] = value_plugin
 
                         for k in annotation.columns:
                             row[k] = line_dict[k].format(**line_dict)

From 7058662f19e418e43487e4a2fb7e7abd5ab85b0e Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Fri, 16 May 2025 19:01:00 +0200
Subject: [PATCH 02/12] fix: little checks

---
 openvariant/annotation/annotation.py | 1 -
 openvariant/variant/variant.py       | 2 --
 2 files changed, 3 deletions(-)

diff --git a/openvariant/annotation/annotation.py b/openvariant/annotation/annotation.py
index 48ab191..e8e22b0 100644
--- a/openvariant/annotation/annotation.py
+++ b/openvariant/annotation/annotation.py
@@ -176,7 +176,6 @@ def __init__(self, annotation_path: str) -> None:
                 self._annotations[tuple(k[AnnotationKeys.FIELD.value])] = instance(k, self._path)
             else:
                 self._annotations[k[AnnotationKeys.FIELD.value]] = instance(k, self._path)
-
         self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, self._clean_annotation_keys())
         self._check_columns()
 
diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py
index 8afe428..ebba768 100644
--- a/openvariant/variant/variant.py
+++ b/openvariant/variant/variant.py
@@ -204,7 +204,6 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
 
         try:
             self.mm, self.file = _open_file(file_path, "rb")
-
             for lnum, line in _base_parser(self.mm, file_path, annotation.delimiter, self.skip_files):
                 try:
                     if header is None:
@@ -218,7 +217,6 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
                         row, plugin_values, mapping_values = {}, {}, {}
                         for head in annotation.annotations.keys():
                             type_ann, value, func = header[head]
-
                             if type_ann == AnnotationTypes.PLUGIN.name:
                                 plugin_values[head] = header[head]
                             elif type_ann == AnnotationTypes.MAPPING.name:

From 25232eb4cb631c1fb174984837ff85510188368d Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Sun, 18 May 2025 17:01:01 +0200
Subject: [PATCH 03/12] add: examples and change docs

---
 docs/examples/plugin_examples.rst             | 192 +++++++++++++++++-
 .../HGVS_decoder/HGVS_decoder.py              | 134 ++++++++++++
 .../plugin_system/HGVS_decoder/__init__.py    |   2 +
 .../plugin_system/plugin_system.ipynb         |  93 +++++++--
 examples/datasets/sample4/gnomAD.csv          |  20 ++
 examples/datasets/sample4/sample4.yaml        |  22 ++
 .../HGVS_decoder/HGVS_decoder.py              | 134 ++++++++++++
 .../plugin_system/HGVS_decoder/__init__.py    |   2 +
 examples/plugin_system/README.md              |  10 +-
 examples/plugin_system/plugin_system.ipynb    |  93 +++++++--
 10 files changed, 666 insertions(+), 36 deletions(-)
 create mode 100644 docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
 create mode 100644 docs/examples/plugin_system/HGVS_decoder/__init__.py
 create mode 100644 examples/datasets/sample4/gnomAD.csv
 create mode 100644 examples/datasets/sample4/sample4.yaml
 create mode 100644 examples/plugin_system/HGVS_decoder/HGVS_decoder.py
 create mode 100644 examples/plugin_system/HGVS_decoder/__init__.py

diff --git a/docs/examples/plugin_examples.rst b/docs/examples/plugin_examples.rst
index 9e6af80..8ad942f 100644
--- a/docs/examples/plugin_examples.rst
+++ b/docs/examples/plugin_examples.rst
@@ -3,12 +3,18 @@
 Plugin examples
 ===============================
 
+
+
 **OpenVariant** offers a plugin system, where the user will be able to build their own plugins and make a customized
 data transformation. First of all, you will need to create a plugin; hence, check :ref:`Command-line interface` section
 and :ref:`Command-line interface examples` to understand how a plugin template can be generated. Also, it is important
 to know how plugins works and how they are composed in order to understand the following examples that we introduce.
 
-We are going to introduce you two little plugins that we will use them in the example. The two plugins are described and built as:
+Unique field plugin
+----------------------
+
+Plugins can modify individual fields, and in this example, we introduce two small plugins that are described and
+implemented as follows:
 
 *Add date* plugin
 ########################
@@ -93,7 +99,188 @@ extract the length between the two fields.
 
     		return context.row[context.field_name]
 
-These two plugins are used in the following example:
+Multiple fields plugin
+-------------------------
+
+The plugin system allows transforming multiple fields simultaneously, and can be constructed as follows:
+
+*HGVS decoder* plugin
+#######################
+
+`The Human Genome Variation Society (HGVS) Nomenclature <https://hgvs-nomenclature.org/stable/>`_ is the global standard
+for describing DNA, RNA, and protein sequence variants. It is widely used in clinical reports, scientific publications,
+and variant databases to communicate genetic changes. HGVS variants are expressed using a specific syntax that encodes
+detailed information about the type and location of the change  (e.g `c.76A>T`, `r.76_78del`, `p.Gly76_Val78del`).
+
+In this plugin, we decode HGVS expressions by identifying and separating the variant type (*TYPE*), its position (*POSITION*),
+and the specific change that occurs (*VARIANT*).
+
+The *annotation* file with multiple fields can be described as:
+
+.. code-block:: yaml
+
+    columns:
+        - TYPE
+        - POSITION
+        - VARIANT
+
+    annotation:
+        - type: plugin
+          plugin: multi_test
+          field:
+            - TYPE
+            - POSITION
+            - VARIANT
+        - type: internal
+          field: HGVS
+          fieldSource:
+            - 'HGVS Consequence'
+            - HGVSp
+
+
+We built the plugin with attention to the order of the different fields it processes.
+
+.. code-block:: python
+
+    from openvariant.plugins.context import Context
+    from openvariant.plugins.plugin import Plugin
+
+    import re
+
+    class HGVS_decoderContext(Context):
+
+        def __init__(self, row: dict, field_name: str, file_path: str) -> None:
+            super().__init__(row, field_name, file_path)
+
+
+    amino_acids_map = {
+        "Ala": "Alanine",
+        "Arg": "Arginine",
+        "Asn": "Asparagine",
+        "Asp": "Aspartic Acid",
+        "Cys": "Cysteine",
+        "Gln": "Glutamine",
+        "Glu": "Glutamic Acid",
+        "Gly": "Glycine",
+        "His": "Histidine",
+        "Ile": "Isoleucine",
+        "Leu": "Leucine",
+        "Lys": "Lysine",
+        "Met": "Methionine",
+        "Phe": "Phenylalanine",
+        "Pro": "Proline",
+        "Ser": "Serine",
+        "Thr": "Threonine",
+        "Trp": "Tryptophan",
+        "Tyr": "Tyrosine",
+        "Val": "Valine",
+        "Ter": "Termination codon"
+    }
+
+    variant_map = {
+        "delins": "deletion-insertion by ",
+        "del": "deletion",
+        "ins": "insertion of ",
+        "dup": "duplication",
+        "inv": "inversion",
+        "con": "conversion",
+        "ext": "extension of ",
+        "fs": "frameshift mutation of "
+    }
+
+    position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)')
+    protein_position_regex = re.compile(r'(?<!\*)(?<!\-)(\d+)\=?\*?')
+
+    nucleotides = re.compile(r'([ACTG]+|[agc]+[u]?)')
+    variant_regex = re.compile(r'[ACTG]+>[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+')
+    variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+')
+
+    amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)'
+    variant_protein_aa_regex = re.compile(rf'(?<!ext)(?<!fs)(?<!ins)(?<!delins){amino_acids}')
+    variant_protein_mod_regex = re.compile(rf'(?:delins{amino_acids}|del|ins{amino_acids}|dup|inv|con|ext{amino_acids}?\*?(?:[0-9]+)?|fs{amino_acids}[0-9]+)')
+    variant_type_regex = re.compile(f'(?:delins|del|ins|dup|inv|con|ext|fs)')
+
+    def parse_hgvs_pos(hgvs_str):
+        matches_pos = re.findall(position_regex, hgvs_str)
+        position = [m[0] for m in matches_pos]
+        position = ";".join(position)
+        return position
+
+    def parse_hgvs_pos_protein(hgvs_str):
+        matches_pos = re.findall(protein_position_regex, hgvs_str)
+        position = [m for m in matches_pos]
+        position = ";".join(position)
+        return position
+
+    def parse_hgvs_variant(hgvs_str):
+        matches = re.findall(variant_regex, hgvs_str)
+        matches_variant = re.findall(variant_type_regex, matches[0])
+        if len(matches_variant) > 0:
+            variant = variant_map.get(matches_variant[0])
+            matches_n = re.findall(nucleotides, matches[0])
+            if len(matches_n) > 0:
+                variant += matches_n[0]
+        else:
+            variant = matches[0]
+        return variant
+
+    def parse_hgvs_variant_protein(hgvs_str):
+        matches = re.findall(variant_protein_aa_regex, hgvs_str)
+        if len(matches) == 1:
+            variant = amino_acids_map.get(matches[0])
+        else:
+            aa_1 = amino_acids_map.get(matches[0])
+            aa_2 = amino_acids_map.get(matches[1])
+            if aa_1 == aa_2:
+                variant = "Synonymous (silent) variant"
+            else:
+                variant = aa_1 + " mutated to " + aa_2
+        matches = re.findall(variant_protein_mod_regex, hgvs_str)
+        if len(matches) > 0:
+            variant += " and "
+            matches_variant = re.findall(variant_type_regex, matches[0])
+            variant += variant_map.get(matches_variant[0])
+            matches_amino_acid = re.findall(amino_acids, matches[0])
+            if len(matches_amino_acid) > 0:
+                variant += amino_acids_map.get(matches_amino_acid[0])
+        return variant
+
+    def interpret_hgvs(hgvs_str):
+        prefix_map = {
+            "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant),
+            "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant),
+            "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant),
+            "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant),
+            "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant),
+            "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein),
+        }
+
+        prefix = hgvs_str[:2]
+
+        result = prefix_map.get(prefix, ("Unknown", [], []))
+        seq = hgvs_str[2:]
+
+        type_variant = result[0]
+        position = result[1](seq)
+        variant = result[2](seq)
+
+        return type_variant, position, variant
+
+
+
+    class HGVS_decoderPlugin(Plugin):
+
+        def run(self, context: HGVS_decoderContext) -> dict:
+
+            value = context.row["HGVS"]
+            type_variant, position, variant = interpret_hgvs(value)
+
+            return type_variant, position, variant
+
+
+
+We can find all the examples on the repository: `OpenVariant examples <https://github.com/bbglab/openvariant/tree/master/examples>`_
+and these plugins are used in the following examples:
 
 .. nbgallery::
     :name: Plugin System examples
@@ -101,4 +288,3 @@ These two plugins are used in the following example:
 
     plugin_system/plugin_system.ipynb
 
-We can find all the examples on the repository: `OpenVariant examples <https://github.com/bbglab/openvariant/tree/master/examples>`_.
\ No newline at end of file
diff --git a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
new file mode 100644
index 0000000..881462c
--- /dev/null
+++ b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
@@ -0,0 +1,134 @@
+from openvariant.plugins.context import Context
+from openvariant.plugins.plugin import Plugin
+
+import re
+
+class HGVS_decoderContext(Context):
+
+    def __init__(self, row: dict, field_name: str, file_path: str) -> None:
+        super().__init__(row, field_name, file_path)
+
+
+amino_acids_map = {
+    "Ala": "Alanine",
+    "Arg": "Arginine",    
+    "Asn": "Asparagine", 
+    "Asp": "Aspartic Acid",
+    "Cys": "Cysteine", 
+    "Gln": "Glutamine", 
+    "Glu": "Glutamic Acid",
+    "Gly": "Glycine",
+    "His": "Histidine", 
+    "Ile": "Isoleucine",
+    "Leu": "Leucine",
+    "Lys": "Lysine",
+    "Met": "Methionine",
+    "Phe": "Phenylalanine", 
+    "Pro": "Proline",
+    "Ser": "Serine",
+    "Thr": "Threonine", 
+    "Trp": "Tryptophan", 
+    "Tyr": "Tyrosine",
+    "Val": "Valine",
+    "Ter": "Termination codon"
+}
+
+variant_map = {
+    "delins": "deletion-insertion by ",
+    "del": "deletion",
+    "ins": "insertion of ",
+    "dup": "duplication",
+    "inv": "inversion",
+    "con": "conversion",
+    "ext": "extension of ",
+    "fs": "frameshift mutation of "
+}
+
+position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)')
+protein_position_regex = re.compile(r'(?<!\*)(?<!\-)(\d+)\=?\*?')
+
+nucleotides = re.compile(r'([ACTG]+|[agc]+[u]?)')
+variant_regex = re.compile(r'[ACTG]+>[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+')
+variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+')
+
+amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)'
+variant_protein_aa_regex = re.compile(rf'(?<!ext)(?<!fs)(?<!ins)(?<!delins){amino_acids}')
+variant_protein_mod_regex = re.compile(rf'(?:delins{amino_acids}|del|ins{amino_acids}|dup|inv|con|ext{amino_acids}?\*?(?:[0-9]+)?|fs{amino_acids}[0-9]+)')
+variant_type_regex = re.compile(f'(?:delins|del|ins|dup|inv|con|ext|fs)')
+
+def parse_hgvs_pos(hgvs_str):
+    matches_pos = re.findall(position_regex, hgvs_str)
+    position = [m[0] for m in matches_pos]
+    position = ";".join(position)
+    return position
+
+def parse_hgvs_pos_protein(hgvs_str):
+    matches_pos = re.findall(protein_position_regex, hgvs_str)
+    position = [m for m in matches_pos]
+    position = ";".join(position)
+    return position
+
+def parse_hgvs_variant(hgvs_str):
+    matches = re.findall(variant_regex, hgvs_str)
+    matches_variant = re.findall(variant_type_regex, matches[0])
+    if len(matches_variant) > 0:
+        variant = variant_map.get(matches_variant[0])
+        matches_n = re.findall(nucleotides, matches[0])
+        if len(matches_n) > 0:
+            variant += matches_n[0]
+    else:
+        variant = matches[0]
+    return variant
+
+def parse_hgvs_variant_protein(hgvs_str):
+    matches = re.findall(variant_protein_aa_regex, hgvs_str)
+    if len(matches) == 1:
+        variant = amino_acids_map.get(matches[0])
+    else:
+        aa_1 = amino_acids_map.get(matches[0])
+        aa_2 = amino_acids_map.get(matches[1])
+        if aa_1 == aa_2:
+            variant = "Synonymous (silent) variant"
+        else:
+            variant = aa_1 + " mutated to " + aa_2
+    matches = re.findall(variant_protein_mod_regex, hgvs_str)
+    if len(matches) > 0:
+        variant += " and "
+        matches_variant = re.findall(variant_type_regex, matches[0])
+        variant += variant_map.get(matches_variant[0])
+        matches_amino_acid = re.findall(amino_acids, matches[0])
+        if len(matches_amino_acid) > 0:
+            variant += amino_acids_map.get(matches_amino_acid[0])
+    return variant
+
+def interpret_hgvs(hgvs_str):
+    prefix_map = {
+        "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant),
+        "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein),
+    }
+
+    prefix = hgvs_str[:2]
+   
+    result = prefix_map.get(prefix, ("Unknown", [], []))
+    seq = hgvs_str[2:]
+   
+    type_variant = result[0]
+    position = result[1](seq) 
+    variant = result[2](seq)
+
+    return type_variant, position, variant
+
+
+
+class HGVS_decoderPlugin(Plugin):
+
+    def run(self, context: HGVS_decoderContext) -> dict:
+        
+        value = context.row["HGVS"]
+        type_variant, position, variant = interpret_hgvs(value)
+
+        return type_variant, position, variant
diff --git a/docs/examples/plugin_system/HGVS_decoder/__init__.py b/docs/examples/plugin_system/HGVS_decoder/__init__.py
new file mode 100644
index 0000000..a9b2ec9
--- /dev/null
+++ b/docs/examples/plugin_system/HGVS_decoder/__init__.py
@@ -0,0 +1,2 @@
+import .multi_test from Multi_testPlugin
+import .multi_test from Multi_testContext
diff --git a/docs/examples/plugin_system/plugin_system.ipynb b/docs/examples/plugin_system/plugin_system.ipynb
index 8469a7c..5a741d6 100644
--- a/docs/examples/plugin_system/plugin_system.ipynb
+++ b/docs/examples/plugin_system/plugin_system.ipynb
@@ -2,19 +2,40 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "source": [
-    "# Plugin system example"
-   ],
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "# Plugin system example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Unique field plugin\n",
+    "\n",
+    "Includes the Add Date plugin and the Get Length plugin."
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -41,34 +62,74 @@
    "source": [
     "%%bash\n",
     "openvar cat ../datasets/sample3 --header"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Multiple fields plugin\n",
+    "\n",
+    "Decoding HGVS across different variants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TYPE\tPOSITION\tVARIANT\n",
+      "cDNA\t-33-42\tG>T\n",
+      "cDNA\t-33-42\tG>C\n",
+      "cDNA\t-33-42\tG>A\n",
+      "cDNA\t-33-39\tT>C\n",
+      "cDNA\t-33-37\tC>A\n",
+      "cDNA\t-33-36\tC>T\n",
+      "cDNA\t-33-34\tA>G\n",
+      "cDNA\t-33-33\tG>A\n",
+      "cDNA\t-33-30\tT>C\n",
+      "cDNA\t-33-28\tC>A\n",
+      "cDNA\t-33-27\tT>C\n",
+      "cDNA\t-33-25\tduplication\n",
+      "cDNA\t-33-24\tG>C\n",
+      "cDNA\t-33-24\tG>A\n",
+      "cDNA\t-33-23\tG>A\n",
+      "cDNA\t-33-21\tG>A\n",
+      "cDNA\t-33-20\tT>A\n",
+      "cDNA\t-33-19\tC>G\n",
+      "cDNA\t-33-19\tC>A\n"
+     ]
     }
-   }
+   ],
+   "source": [
+    "%%bash\n",
+    "openvar cat ../datasets/sample4 --header"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "nbformat_minor": 4
+}
diff --git a/examples/datasets/sample4/gnomAD.csv b/examples/datasets/sample4/gnomAD.csv
new file mode 100644
index 0000000..73b6f66
--- /dev/null
+++ b/examples/datasets/sample4/gnomAD.csv
@@ -0,0 +1,20 @@
+gnomAD ID,Chromosome,Position,rsIDs,Reference,Alternate,Filters - exomes,Filters - genomes,Transcript,HGVS Consequence,Protein Consequence,Transcript Consequence,VEP Annotation,ClinVar Germline Classification,ClinVar Variation ID,Flags,Allele Count,Allele Number,Allele Frequency,Homozygote Count,Hemizygote Count,Filters - joint,GroupMax FAF group,GroupMax FAF frequency,cadd,revel_max,spliceai_ds_max,pangolin_largest_ds,phylop,sift_max,polyphen_max,Allele Count African/African American,Allele Number African/African American,Homozygote Count African/African American,Hemizygote Count African/African American,Allele Count Admixed American,Allele Number Admixed American,Homozygote Count Admixed American,Hemizygote Count Admixed American,Allele Count Ashkenazi Jewish,Allele Number Ashkenazi Jewish,Homozygote Count Ashkenazi Jewish,Hemizygote Count Ashkenazi Jewish,Allele Count East Asian,Allele Number East Asian,Homozygote Count East Asian,Hemizygote Count East Asian,Allele Count European (Finnish),Allele Number European (Finnish),Homozygote Count European (Finnish),Hemizygote Count European (Finnish),Allele Count Middle Eastern,Allele Number Middle Eastern,Homozygote Count Middle Eastern,Hemizygote Count Middle Eastern,Allele Count European (non-Finnish),Allele Number European (non-Finnish),Homozygote Count European (non-Finnish),Hemizygote Count European (non-Finnish),Allele Count Amish,Allele Number Amish,Homozygote Count Amish,Hemizygote Count Amish,Allele Count South Asian,Allele Number South Asian,Homozygote Count South Asian,Hemizygote Count South Asian,Allele Count Remaining,Allele Number Remaining,Homozygote Count Remaining,Hemizygote Count Remaining
+1-3682291-G-T,1,3682291,,G,T,PASS,NA,ENST00000378295.9,c.-33-42G>T,,c.-33-42G>T,intron_variant,,,,2,1375376,1.45414781121671E-06,0,0,PASS,sas,0.00000531,0.482,,0,0,-0.056,,,0,67716,0,0,0,37548,0,0,0,21442,0,0,0,35262,0,0,0,54404,0,0,0,5224,0,0,0,1038424,0,0,0,912,0,0,2,62504,0,0,0,51940,0,0
+1-3682291-G-C,1,3682291,rs534349566,G,C,PASS,PASS,ENST00000378295.9,c.-33-42G>C,,c.-33-42G>C,intron_variant,,,,59,1375374,4.28974228100866E-05,1,0,PASS,amr,0.0012026,0.419,,0,0,-0.056,,,1,67716,0,0,57,37546,1,0,0,21442,0,0,0,35262,0,0,0,54404,0,0,0,5224,0,0,0,1038424,0,0,0,912,0,0,0,62504,0,0,1,51940,0,0
+1-3682291-G-A,1,3682291,rs534349566,G,A,PASS,PASS,ENST00000378295.9,c.-33-42G>A,,c.-33-42G>A,intron_variant,,,,23,1375258,1.6724134671458E-05,0,0,PASS,nfe,0.0000117,0.644,,0.01,0,-0.056,,,0,67594,0,0,0,37528,0,0,0,21442,0,0,1,35274,0,0,1,54404,0,0,0,5246,0,0,19,1038432,0,0,0,912,0,0,0,62508,0,0,2,51918,0,0
+1-3682294-T-C,1,3682294,rs916680140,T,C,PASS,NA,ENST00000378295.9,c.-33-39T>C,,c.-33-39T>C,intron_variant,,,,2,1383912,1.44517859517079E-06,0,0,PASS,nfe,0.00000032,0.067,,0,0,-0.017,,,0,67926,0,0,0,38244,0,0,0,21612,0,0,0,35122,0,0,0,54618,0,0,0,5294,0,0,2,1044616,0,0,0,910,0,0,0,63312,0,0,0,52258,0,0
+1-3682296-C-A,1,3682296,,C,A,PASS,NA,ENST00000378295.9,c.-33-37C>A,,c.-33-37C>A,intron_variant,,,,1,1395094,7.16797577797625E-07,0,0,PASS,,,0.693,,0.03,0,-0.247,,,0,68312,0,0,0,38864,0,0,0,21976,0,0,0,35740,0,0,0,54858,0,0,0,5336,0,0,0,1051480,0,0,0,912,0,0,1,64836,0,0,0,52780,0,0
+1-3682297-C-T,1,3682297,,C,T,PASS,NA,ENST00000378295.9,c.-33-36C>T,,c.-33-36C>T,intron_variant,,,,1,1397234,7.1569973247144E-07,0,0,PASS,,,2.31,,0.02,0,-0.027,,,0,68350,0,0,1,38954,0,0,0,22022,0,0,0,35768,0,0,0,54912,0,0,0,5346,0,0,0,1053160,0,0,0,912,0,0,0,64990,0,0,0,52820,0,0
+1-3682299-A-G,1,3682299,,A,G,PASS,NA,ENST00000378295.9,c.-33-34A>G,,c.-33-34A>G,intron_variant,,,,3,1395420,2.14989035559187E-06,0,0,PASS,nfe,0.00000032,0.509,,0,-0.01,-3.89,,,0,68240,0,0,0,38698,0,0,0,21962,0,0,0,35692,0,0,0,54938,0,0,0,5348,0,0,2,1052118,0,0,0,910,0,0,1,64786,0,0,0,52728,0,0
+1-3682300-G-A,1,3682300,,G,A,PASS,NA,ENST00000378295.9,c.-33-33G>A,,c.-33-33G>A,intron_variant,,,,1,1400418,7.14072512635513E-07,0,0,PASS,,,0.078,,0.01,0,-0.611,,,0,68452,0,0,0,39038,0,0,0,22120,0,0,0,35804,0,0,0,55004,0,0,0,5354,0,0,1,1055368,0,0,0,910,0,0,0,65406,0,0,0,52962,0,0
+1-3682303-T-C,1,3682303,,T,C,PASS,NA,ENST00000378295.9,c.-33-30T>C,,c.-33-30T>C,intron_variant,,,,1,1411590,7.08421000432137E-07,0,0,PASS,,,0.334,,0,-0.01,-2.55,,,0,68850,0,0,0,40442,0,0,0,22552,0,0,0,35996,0,0,0,55292,0,0,0,5408,0,0,0,1062338,0,0,0,912,0,0,0,66380,0,0,1,53420,0,0
+1-3682305-C-A,1,3682305,,C,A,PASS,NA,ENST00000378295.9,c.-33-28C>A,,c.-33-28C>A,intron_variant,,,,1,1429328,6.99629476229389E-07,0,0,PASS,,,6.93,,0,0,2.11,,,0,69494,0,0,0,42188,0,0,0,23198,0,0,0,36648,0,0,0,55812,0,0,0,5478,0,0,0,1072304,0,0,0,912,0,0,0,68996,0,0,1,54298,0,0
+1-3682306-T-C,1,3682306,rs374235190,T,C,PASS,PASS,ENST00000378295.9,c.-33-27T>C,,c.-33-27T>C,intron_variant,,,,36,1429814,2.5178100088543E-05,0,0,PASS,sas,0.00037441,4.67,,0.02,-0.07,0.436,,,0,69468,0,0,0,42094,0,0,0,23190,0,0,0,36616,0,0,0,55790,0,0,0,5480,0,0,0,1073056,0,0,0,912,0,0,35,68932,0,0,1,54276,0,0
+1-3682307-C-CA,1,3682307,,C,CA,PASS,NA,ENST00000378295.9,c.-33-25dup,,c.-33-25dup,intron_variant,,,,1,1431932,6.98357184559043E-07,0,0,PASS,,,6.6,,0,0,2.41,,,0,69544,0,0,0,42206,0,0,0,23256,0,0,0,36706,0,0,0,55852,0,0,0,5492,0,0,1,1074286,0,0,0,912,0,0,0,69304,0,0,0,54374,0,0
+1-3682309-G-C,1,3682309,,G,C,PASS,NA,ENST00000378295.9,c.-33-24G>C,,c.-33-24G>C,intron_variant,,,,1,1442640,6.93173626129873E-07,0,0,PASS,,,0.693,,0,0,-3.09,,,0,69918,0,0,0,43250,0,0,0,23634,0,0,0,37102,0,0,0,56160,0,0,0,5522,0,0,0,1080638,0,0,0,912,0,0,0,70644,0,0,1,54860,0,0
+1-3682309-G-A,1,3682309,,G,A,PASS,NA,ENST00000378295.9,c.-33-24G>A,,c.-33-24G>A,intron_variant,,,,6,1442640,4.15904175677924E-06,0,0,PASS,nfe,0.000002,0.825,,0,0,-3.09,,,0,69918,0,0,0,43250,0,0,0,23634,0,0,0,37102,0,0,0,56160,0,0,0,5522,0,0,6,1080638,0,0,0,912,0,0,0,70644,0,0,0,54860,0,0
+1-3682310-G-A,1,3682310,rs1449761755,G,A,PASS,NA,ENST00000378295.9,c.-33-23G>A,,c.-33-23G>A,intron_variant,,,,1,1445388,6.91855750843372E-07,0,0,PASS,,,0.247,,0,0,-0.758,,,0,70038,0,0,1,43458,0,0,0,23788,0,0,0,37182,0,0,0,56218,0,0,0,5536,0,0,0,1082228,0,0,0,912,0,0,0,71036,0,0,0,54992,0,0
+1-3682312-G-A,1,3682312,,G,A,PASS,NA,ENST00000378295.9,c.-33-21G>A,,c.-33-21G>A,intron_variant,,,,5,1447908,3.45325807993326E-06,0,0,PASS,nfe,0.00000086,4.77,,0.03,0.01,0.468,,,0,70096,0,0,0,43558,0,0,0,23852,0,0,0,37252,0,0,0,56312,0,0,0,5542,0,0,4,1083938,0,0,0,912,0,0,1,71346,0,0,0,55100,0,0
+1-3682313-T-A,1,3682313,,T,A,PASS,NA,ENST00000378295.9,c.-33-20T>A,,c.-33-20T>A,intron_variant,,,,1,1446084,6.91522760780148E-07,0,0,PASS,,,3.91,,0,0,-0.524,,,1,69952,0,0,0,43328,0,0,0,23842,0,0,0,37070,0,0,0,56288,0,0,0,5542,0,0,0,1082966,0,0,0,910,0,0,0,71170,0,0,0,55016,0,0
+1-3682314-C-G,1,3682314,rs554511962,C,G,PASS,NA,ENST00000378295.9,c.-33-19C>G,,c.-33-19C>G,intron_variant,,,,6,1451188,4.13454356017277E-06,0,0,PASS,nfe,0.00000199,0.322,,0,0,0.42,,,0,70186,0,0,0,43810,0,0,0,24036,0,0,0,37346,0,0,0,56458,0,0,0,5552,0,0,6,1085908,0,0,0,912,0,0,0,71728,0,0,0,55252,0,0
+1-3682314-C-A,1,3682314,rs554511962,C,A,PASS,PASS,ENST00000378295.9,c.-33-19C>A,,c.-33-19C>A,intron_variant,,,,23,1451186,1.5849105490268E-05,0,0,PASS,eas,0.00037641,0.292,,0,0,0.42,,,0,70186,0,0,0,43810,0,0,0,24036,0,0,21,37346,0,0,0,56458,0,0,0,5552,0,0,0,1085906,0,0,0,912,0,0,1,71728,0,0,1,55252,0,0
diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml
new file mode 100644
index 0000000..5f3df3f
--- /dev/null
+++ b/examples/datasets/sample4/sample4.yaml
@@ -0,0 +1,22 @@
+pattern:
+  - '*.csv'
+
+delimiter: C
+
+columns:
+- TYPE
+- POSITION
+- VARIANT
+
+annotation:
+- type: plugin
+  plugin: HGVS_decoder
+  field: 
+  - TYPE
+  - POSITION
+  - VARIANT
+- type: internal
+  field: HGVS
+  fieldSource:
+  - 'HGVS Consequence'
+  - HGVSp
diff --git a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
new file mode 100644
index 0000000..881462c
--- /dev/null
+++ b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
@@ -0,0 +1,134 @@
+from openvariant.plugins.context import Context
+from openvariant.plugins.plugin import Plugin
+
+import re
+
+class HGVS_decoderContext(Context):
+
+    def __init__(self, row: dict, field_name: str, file_path: str) -> None:
+        super().__init__(row, field_name, file_path)
+
+
+amino_acids_map = {
+    "Ala": "Alanine",
+    "Arg": "Arginine",    
+    "Asn": "Asparagine", 
+    "Asp": "Aspartic Acid",
+    "Cys": "Cysteine", 
+    "Gln": "Glutamine", 
+    "Glu": "Glutamic Acid",
+    "Gly": "Glycine",
+    "His": "Histidine", 
+    "Ile": "Isoleucine",
+    "Leu": "Leucine",
+    "Lys": "Lysine",
+    "Met": "Methionine",
+    "Phe": "Phenylalanine", 
+    "Pro": "Proline",
+    "Ser": "Serine",
+    "Thr": "Threonine", 
+    "Trp": "Tryptophan", 
+    "Tyr": "Tyrosine",
+    "Val": "Valine",
+    "Ter": "Termination codon"
+}
+
+variant_map = {
+    "delins": "deletion-insertion by ",
+    "del": "deletion",
+    "ins": "insertion of ",
+    "dup": "duplication",
+    "inv": "inversion",
+    "con": "conversion",
+    "ext": "extension of ",
+    "fs": "frameshift mutation of "
+}
+
+position_regex = re.compile(r'(\(?\*?-?\??\_?\d+(?:\_?[+-]\d+\??)?\)?(_)?(?:\(?\*?-?\d+\_?(?:[+-]\d+)?\??\)?)?)')
+protein_position_regex = re.compile(r'(?<!\*)(?<!\-)(\d+)\=?\*?')
+
+nucleotides = re.compile(r'([ACTG]+|[agc]+[u]?)')
+variant_regex = re.compile(r'[ACTG]+>[ACTG]+|del|ins[ACTG]+|dup|inv|con|\[[0-9]+\]|delins[ACTG]+')
+variant_rna_regex = re.compile(r'[agcu]+>[agcu]+|del|ins[agcu]+|dup|inv|con|\[[0-9]+\]|delins[agcu]+')
+
+amino_acids = r'(?:Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter)'
+variant_protein_aa_regex = re.compile(rf'(?<!ext)(?<!fs)(?<!ins)(?<!delins){amino_acids}')
+variant_protein_mod_regex = re.compile(rf'(?:delins{amino_acids}|del|ins{amino_acids}|dup|inv|con|ext{amino_acids}?\*?(?:[0-9]+)?|fs{amino_acids}[0-9]+)')
+variant_type_regex = re.compile(f'(?:delins|del|ins|dup|inv|con|ext|fs)')
+
+def parse_hgvs_pos(hgvs_str):
+    matches_pos = re.findall(position_regex, hgvs_str)
+    position = [m[0] for m in matches_pos]
+    position = ";".join(position)
+    return position
+
+def parse_hgvs_pos_protein(hgvs_str):
+    matches_pos = re.findall(protein_position_regex, hgvs_str)
+    position = [m for m in matches_pos]
+    position = ";".join(position)
+    return position
+
+def parse_hgvs_variant(hgvs_str):
+    matches = re.findall(variant_regex, hgvs_str)
+    matches_variant = re.findall(variant_type_regex, matches[0])
+    if len(matches_variant) > 0:
+        variant = variant_map.get(matches_variant[0])
+        matches_n = re.findall(nucleotides, matches[0])
+        if len(matches_n) > 0:
+            variant += matches_n[0]
+    else:
+        variant = matches[0]
+    return variant
+
+def parse_hgvs_variant_protein(hgvs_str):
+    matches = re.findall(variant_protein_aa_regex, hgvs_str)
+    if len(matches) == 1:
+        variant = amino_acids_map.get(matches[0])
+    else:
+        aa_1 = amino_acids_map.get(matches[0])
+        aa_2 = amino_acids_map.get(matches[1])
+        if aa_1 == aa_2:
+            variant = "Synonymous (silent) variant"
+        else:
+            variant = aa_1 + " mutated to " + aa_2
+    matches = re.findall(variant_protein_mod_regex, hgvs_str)
+    if len(matches) > 0:
+        variant += " and "
+        matches_variant = re.findall(variant_type_regex, matches[0])
+        variant += variant_map.get(matches_variant[0])
+        matches_amino_acid = re.findall(amino_acids, matches[0])
+        if len(matches_amino_acid) > 0:
+            variant += amino_acids_map.get(matches_amino_acid[0])
+    return variant
+
+def interpret_hgvs(hgvs_str):
+    prefix_map = {
+        "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "c.": ("cDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "n.": ("ncDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "m.": ("mtDNA", parse_hgvs_pos, parse_hgvs_variant),
+        "r.": ("RNA", parse_hgvs_pos, parse_hgvs_variant),
+        "p.": ("Protein", parse_hgvs_pos_protein, parse_hgvs_variant_protein),
+    }
+
+    prefix = hgvs_str[:2]
+   
+    result = prefix_map.get(prefix, ("Unknown", [], []))
+    seq = hgvs_str[2:]
+   
+    type_variant = result[0]
+    position = result[1](seq) 
+    variant = result[2](seq)
+
+    return type_variant, position, variant
+
+
+
+class HGVS_decoderPlugin(Plugin):
+
+    def run(self, context: HGVS_decoderContext) -> dict:
+        
+        value = context.row["HGVS"]
+        type_variant, position, variant = interpret_hgvs(value)
+
+        return type_variant, position, variant
diff --git a/examples/plugin_system/HGVS_decoder/__init__.py b/examples/plugin_system/HGVS_decoder/__init__.py
new file mode 100644
index 0000000..a9b2ec9
--- /dev/null
+++ b/examples/plugin_system/HGVS_decoder/__init__.py
@@ -0,0 +1,2 @@
+import .multi_test from Multi_testPlugin
+import .multi_test from Multi_testContext
diff --git a/examples/plugin_system/README.md b/examples/plugin_system/README.md
index 0ea62c3..c918436 100644
--- a/examples/plugin_system/README.md
+++ b/examples/plugin_system/README.md
@@ -1,3 +1,11 @@
 # Plugin system examples
 
-- [Plugin system](plugin_system.ipynb) - A simple example that two plugins.
+### Plugins
+
+- [Add date](./add_date) - Plugin to add the current date.
+- [Get lenght](./get_length) - Plugin to obtain the different between two values.
+- [HGVS decoder](./HGVS_decoder) - Plugin to decode the type, position and change of different variants.
+
+### Output example
+
+- [Plugin system](plugin_system.ipynb) - Unique and multiple fields plugins example.
diff --git a/examples/plugin_system/plugin_system.ipynb b/examples/plugin_system/plugin_system.ipynb
index 8469a7c..5a741d6 100644
--- a/examples/plugin_system/plugin_system.ipynb
+++ b/examples/plugin_system/plugin_system.ipynb
@@ -2,19 +2,40 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "source": [
-    "# Plugin system example"
-   ],
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
-   }
+   },
+   "source": [
+    "# Plugin system example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Unique field plugin\n",
+    "\n",
+    "Includes the Add Date plugin and the Get Length plugin."
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -41,34 +62,74 @@
    "source": [
     "%%bash\n",
     "openvar cat ../datasets/sample3 --header"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Multiple fields plugin\n",
+    "\n",
+    "Decoding HGVS across different variants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TYPE\tPOSITION\tVARIANT\n",
+      "cDNA\t-33-42\tG>T\n",
+      "cDNA\t-33-42\tG>C\n",
+      "cDNA\t-33-42\tG>A\n",
+      "cDNA\t-33-39\tT>C\n",
+      "cDNA\t-33-37\tC>A\n",
+      "cDNA\t-33-36\tC>T\n",
+      "cDNA\t-33-34\tA>G\n",
+      "cDNA\t-33-33\tG>A\n",
+      "cDNA\t-33-30\tT>C\n",
+      "cDNA\t-33-28\tC>A\n",
+      "cDNA\t-33-27\tT>C\n",
+      "cDNA\t-33-25\tduplication\n",
+      "cDNA\t-33-24\tG>C\n",
+      "cDNA\t-33-24\tG>A\n",
+      "cDNA\t-33-23\tG>A\n",
+      "cDNA\t-33-21\tG>A\n",
+      "cDNA\t-33-20\tT>A\n",
+      "cDNA\t-33-19\tC>G\n",
+      "cDNA\t-33-19\tC>A\n"
+     ]
     }
-   }
+   ],
+   "source": [
+    "%%bash\n",
+    "openvar cat ../datasets/sample4 --header"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "nbformat_minor": 4
+}

From 8cb9d1a80222ecd7923934c80cc226ffa08948d2 Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Sun, 18 May 2025 17:05:32 +0200
Subject: [PATCH 04/12] fix: minor fix plugin examples

---
 docs/examples/plugin_examples.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/examples/plugin_examples.rst b/docs/examples/plugin_examples.rst
index 8ad942f..c387cf1 100644
--- a/docs/examples/plugin_examples.rst
+++ b/docs/examples/plugin_examples.rst
@@ -126,7 +126,7 @@ The *annotation* file with multiple fields can be described as:
 
     annotation:
         - type: plugin
-          plugin: multi_test
+          plugin: HGVS_decoder
           field:
             - TYPE
             - POSITION

From 7613c8102f6239b7344bd02738e3732383db144e Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Sun, 18 May 2025 22:49:21 +0200
Subject: [PATCH 05/12] fix: minor fix

---
 examples/datasets/sample4/sample4.yaml | 1 +
 openvariant/variant/variant.py         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml
index 5f3df3f..403ba28 100644
--- a/examples/datasets/sample4/sample4.yaml
+++ b/examples/datasets/sample4/sample4.yaml
@@ -4,6 +4,7 @@ pattern:
 delimiter: C
 
 columns:
+- HGVS
 - TYPE
 - POSITION
 - VARIANT
diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py
index ebba768..9581170 100644
--- a/openvariant/variant/variant.py
+++ b/openvariant/variant/variant.py
@@ -198,7 +198,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
         header, row, row_header = None, {}, []
 
         matches = [check_extension(ext, file_path) for ext in annotation.patterns]
-
+        print(annotation._annotations)
         if not any(matches):
             raise NameError("Annotation patterns don't match with input file.")
 

From a8bbe70c034f644f7733409919d0ed62d98f907b Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Sun, 18 May 2025 22:51:19 +0200
Subject: [PATCH 06/12] fix: minor fix

---
 examples/datasets/sample4/sample4.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/datasets/sample4/sample4.yaml b/examples/datasets/sample4/sample4.yaml
index 403ba28..5f3df3f 100644
--- a/examples/datasets/sample4/sample4.yaml
+++ b/examples/datasets/sample4/sample4.yaml
@@ -4,7 +4,6 @@ pattern:
 delimiter: C
 
 columns:
-- HGVS
 - TYPE
 - POSITION
 - VARIANT

From daeda16d5e39798c9d2fc654fa04d04221feaa17 Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Sun, 18 May 2025 23:41:03 +0200
Subject: [PATCH 07/12] fix: test fix

---
 openvariant/variant/variant.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py
index 9581170..ebba768 100644
--- a/openvariant/variant/variant.py
+++ b/openvariant/variant/variant.py
@@ -198,7 +198,7 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
         header, row, row_header = None, {}, []
 
         matches = [check_extension(ext, file_path) for ext in annotation.patterns]
-        print(annotation._annotations)
+
         if not any(matches):
             raise NameError("Annotation patterns don't match with input file.")
 

From 434ed5212a748df9adb542bb9144592369897636 Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Wed, 21 May 2025 15:40:45 +0200
Subject: [PATCH 08/12] fix: bug on HGVS decoder

---
 docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py | 5 ++++-
 examples/plugin_system/HGVS_decoder/HGVS_decoder.py      | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
index 881462c..b23c87a 100644
--- a/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
+++ b/docs/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
@@ -101,6 +101,9 @@ def parse_hgvs_variant_protein(hgvs_str):
             variant += amino_acids_map.get(matches_amino_acid[0])
     return variant
 
+def parse_hgvs_unknow(hgvs_str):
+    return None
+
 def interpret_hgvs(hgvs_str):
     prefix_map = {
         "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant),
@@ -113,7 +116,7 @@ def interpret_hgvs(hgvs_str):
 
     prefix = hgvs_str[:2]
    
-    result = prefix_map.get(prefix, ("Unknown", [], []))
+    result = prefix_map.get(prefix, ("Unknown", parse_hgvs_unknow, parse_hgvs_unknow))
     seq = hgvs_str[2:]
    
     type_variant = result[0]
diff --git a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
index 881462c..b23c87a 100644
--- a/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
+++ b/examples/plugin_system/HGVS_decoder/HGVS_decoder.py
@@ -101,6 +101,9 @@ def parse_hgvs_variant_protein(hgvs_str):
             variant += amino_acids_map.get(matches_amino_acid[0])
     return variant
 
+def parse_hgvs_unknow(hgvs_str):
+    return None
+
 def interpret_hgvs(hgvs_str):
     prefix_map = {
         "g.": ("gDNA", parse_hgvs_pos, parse_hgvs_variant),
@@ -113,7 +116,7 @@ def interpret_hgvs(hgvs_str):
 
     prefix = hgvs_str[:2]
    
-    result = prefix_map.get(prefix, ("Unknown", [], []))
+    result = prefix_map.get(prefix, ("Unknown", parse_hgvs_unknow, parse_hgvs_unknow))
     seq = hgvs_str[2:]
    
     type_variant = result[0]

From 75ef803e94a977966d181ea211d3f678a71c8b6b Mon Sep 17 00:00:00 2001
From: David Martinez Millan <dmartinezmillan@hotmail.com>
Date: Wed, 21 May 2025 16:56:06 +0200
Subject: [PATCH 09/12] add: docs on plugin system

---
 docs/user_guide/annotation_structure.rst | 20 ++++++++++++++++++--
 docs/user_guide/plugin_system.rst        | 16 +++++++++++++++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/docs/user_guide/annotation_structure.rst b/docs/user_guide/annotation_structure.rst
index c99e3ed..c2bfb6a 100644
--- a/docs/user_guide/annotation_structure.rst
+++ b/docs/user_guide/annotation_structure.rst
@@ -228,12 +228,12 @@ Plugin
 #############
 
 It will apply the plugin functionality to each row of the `input` file. The plugin can be internal, located into `plugin`
-folder or can be customized and created by the user. See further details in :ref:`Plugin system` section.
+folder or can be customized and created by the user.
 
 The parameters that `Plugin` needs are:
 
 * ``type``: type of annotation. (required)
-* ``field``: name that will appear as a head column of this annotation. (required)
+* ``field``: a single name or a list of fields that that will appear as a head column of this annotation. (required)
 * ``plugin``: name of plugin to apply (required)
 
 .. code-block:: yaml
@@ -243,6 +243,22 @@ The parameters that `Plugin` needs are:
           field: 'ALT_TYPE'
           plugin: 'alteration_type'
 
+The plugin system supports multiple fields, however, the order and number of fields must be consistent between the
+annotation and the plugin implementation.
+
+.. code-block:: yaml
+
+    # Example:
+        - type: 'plugin'
+          field:
+            - 'Chr'
+            - 'Start'
+            - 'End'
+            - 'Alt'
+            - 'Ref'
+          plugin: 'variant_decoder'
+
+See further details in :ref:`Plugin system` section.
 
 Exclude (optional)
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/user_guide/plugin_system.rst b/docs/user_guide/plugin_system.rst
index 572caae..b81c8bb 100644
--- a/docs/user_guide/plugin_system.rst
+++ b/docs/user_guide/plugin_system.rst
@@ -20,7 +20,7 @@ visualize how the different classes are connected and composed with **OpenVarian
 |
 
 As we have mentioned before, the plugin has to be present on the `annotation` file in order to be used. Custom plugins will be placed in the 
-folder where the environment variable :bash:`OPENVAR_PLUGIN` points (:bash:`/home/user/.local/share/openvariant/` by default). 
+folder where the environment variable ``OPENVAR_PLUGIN`` points (``/home/user/.local/share/openvariant/`` by default).
 The `Builder` will manage to find them and apply the data transformation.
 Plugins will inherit `Context` and `Plugin` as base classes for each plugin. These classes are described as it follows:
 
@@ -62,5 +62,19 @@ Plugins will inherit `Context` and `Plugin` as base classes for each plugin. The
         """
         raise NotImplementedError
 
+A plugin can return either a single field or multiple fields. Both cases are handled as follows:
+
+*Returning a single field:*
+
+.. code-block:: python
+
+        return position
+
+*Returning multiple fields:*
+
+.. code-block:: python
+
+        return chromosome, start, end, alt, ref
+
 Check :ref:`Command-line interface` to know how to create a new plugin. Also, to check more examples on how plugins can be
 applied and written, see :ref:`Plugin examples`.
\ No newline at end of file

From 8311288c6a170fbb219ebad17688c166a62a741d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?=
 <10314744+dmartmillan@users.noreply.github.com>
Date: Thu, 22 May 2025 13:53:58 +0200
Subject: [PATCH 10/12] fix: update variant.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 openvariant/variant/variant.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openvariant/variant/variant.py b/openvariant/variant/variant.py
index ebba768..31c2a19 100644
--- a/openvariant/variant/variant.py
+++ b/openvariant/variant/variant.py
@@ -182,7 +182,7 @@ def __init__(self, path: str, annotation: Annotation, skip_files: bool = False)
         self._path: str = path
         self._annotation: Annotation = annotation
 
-        #annotation_keys = [item for x in annotation.annotations.keys() for item in (list(x) if isinstance(x, tuple) else [x])]
+        
         self._header: List[str] = annotation.columns
         self.skip_files = skip_files
 

From 80c8229f2d50d42046e6000dd200d79ec2fa01e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?=
 <10314744+dmartmillan@users.noreply.github.com>
Date: Thu, 22 May 2025 13:54:32 +0200
Subject: [PATCH 11/12] fix: update plugin_system README

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 examples/plugin_system/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/plugin_system/README.md b/examples/plugin_system/README.md
index c918436..fdc90f3 100644
--- a/examples/plugin_system/README.md
+++ b/examples/plugin_system/README.md
@@ -3,7 +3,7 @@
 ### Plugins
 
 - [Add date](./add_date) - Plugin to add the current date.
-- [Get lenght](./get_length) - Plugin to obtain the different between two values.
+- [Get length](./get_length) - Plugin to obtain the difference between two values.
 - [HGVS decoder](./HGVS_decoder) - Plugin to decode the type, position and change of different variants.
 
 ### Output example

From ecb18f0f49c47384a6fb8f9c20d04ac154ad4914 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?=
 <10314744+dmartmillan@users.noreply.github.com>
Date: Thu, 22 May 2025 13:55:02 +0200
Subject: [PATCH 12/12] fix: typo in docs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 docs/user_guide/annotation_structure.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_guide/annotation_structure.rst b/docs/user_guide/annotation_structure.rst
index c2bfb6a..6caa278 100644
--- a/docs/user_guide/annotation_structure.rst
+++ b/docs/user_guide/annotation_structure.rst
@@ -233,7 +233,7 @@ folder or can be customized and created by the user.
 The parameters that `Plugin` needs are:
 
 * ``type``: type of annotation. (required)
-* ``field``: a single name or a list of fields that that will appear as a head column of this annotation. (required)
+* ``field``: a single name or a list of fields that will appear as a head column of this annotation. (required)
 * ``plugin``: name of plugin to apply (required)
 
 .. code-block:: yaml