Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ templates = $(foreach i,$(build_files),--template $(i))

### Set Up

build build/validate:
build build/validate build/valve:
mkdir -p $@

# We use the official development version of ROBOT for most things.
Expand Down Expand Up @@ -115,20 +115,35 @@ build/validation_errors.tsv: src/scripts/validate_templates.py index.tsv iedb/ie
build/validation_errors_strict.tsv: src/scripts/validate_templates.py index.tsv iedb/iedb.tsv $(build_files)
python3 $< index.tsv iedb/iedb.tsv build $@

VALVE_CONFIG := $(foreach f,$(shell ls src/validation),src/validation/$(f))

$(VALVE_CONFIG): $(VALVE_CONFIG_MASTER) | build/valve
cp src/validation/* build/valve

build/valve/%.tsv: ontology/%.tsv | build/valve
cp $< $@

build/validation_valve.tsv: $(VALVE_CONFIG) $(source_files)
valve src/validation ontology -o $@ -r 3 || true

apply_%: build/validation_%.tsv | .cogs
cogs clear all
cogs apply $<

.PHONY: validate_tables
validate_tables:
cogs fetch && cogs pull
cogs clear all
make apply_errors
make apply_valve
cogs push

.PHONY: validate_tables_strict
validate_tables_strict:
cogs fetch && cogs pull
cogs clear all
make apply_errors_strict
make apply_valve
cogs push

### Processing
Expand Down
5 changes: 4 additions & 1 deletion ontology/core.tsv
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
Label IEDB Label Class Type Parent Logic Definition Definition Source Example of Usage
LABEL A OBI:9991118 CLASS_TYPE C % C % A IAO:0000115 A IAO:0000119 A IAO:0000112
Beta-2-microglobulin locus subclass genetic locus The region of a chromosome that codes for Beta-2-microglobulin molecules. IEDB
MHC haplotype subclass SO:0001024 A set of MHC alleles that is frequently inherited together. IEDB The mouse H-2-k class II haplotype is expressed in C3H mice.
genetic locus subclass genetic entity
haplotype subclass genetic entity
haplotype_block subclass genetic entity
MHC haplotype subclass haplotype A set of MHC alleles that is frequently inherited together. IEDB The mouse H-2-k class II haplotype is expressed in C3H mice.
MHC ligand assay subclass immune epitope assay
MHC locus subclass genetic locus The region of a chromosome that codes for MHC molecules. IEDB The class II regions encoding for the DP, DQ, and DR molecules on human chromosome 6.
MHC protein complex with haplotype haplotype equivalent MHC protein complex ('haplotype member of' some 'MHC haplotype') A protein complex that is a member of an MHC haplotype. IEDB The mouse H-2-Kk molecule belongs to the H-2-k haplotype.
Expand Down
13 changes: 10 additions & 3 deletions ontology/external.tsv
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
ID Label Editor Preferred Term IEDB Label Class Type Parent Logic Definition Definition Source Example of Usage Source Ontology Species Code
ID A rdfs:label A IAO:0000111 A OBI:9991118 CLASS_TYPE C % C % A IAO:0000115 A IAO:0000119 A IAO:0000112 AI IAO:0000412
BFO:0000040 material entity material entity http://purl.obolibrary.org/obo/bfo.owl
ECO:0000000 evidence evidence subclass information content entity http://purl.obolibrary.org/obo/eco.owl
ECO:0000006 experimental evidence experimental evidence subclass evidence http://purl.obolibrary.org/obo/eco.owl
ECO:0000033 author statement supported by traceable reference http://purl.obolibrary.org/obo/eco.owl
GO:0042611 MHC protein complex MHC protein complex MHC molecule equivalent protein complex ('has part' some (protein and ('gene product of' some 'MHC locus'))) A transmembrane protein complex composed of an MHC alpha chain and, in most cases, either an MHC class II beta chain or an invariant beta2-microglobin chain, and with or without a bound peptide, lipid, or polysaccharide antigen. GO http://purl.obolibrary.org/obo/go.owl
GO:0043234 protein complex protein complex MHC subclass material entity http://purl.obolibrary.org/obo/go.owl
IAO:0000030 information content entity information content entity http://purl.obolibrary.org/obo/iao.owl
NCBITaxon:7959 grass carp Ctenopharyngodon idella subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl Ctid
NCBITaxon:8355 clawed frog Xenopus laevis subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl Xela
NCBITaxon:8839 duck Anas platyrhynchos subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl Anpl
Expand All @@ -26,9 +30,12 @@ NCBITaxon:9940 sheep Ovis aries subclass organism http://purl.obolibrary.or
NCBITaxon:9986 rabbit Oryctolagus cuniculus subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl RLA
NCBITaxon:10090 mouse Mus musculus subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl H2
NCBITaxon:10116 rat Rattus norvegicus subclass organism http://purl.obolibrary.org/obo/ncbitaxon.owl RT1
OBI:0100026 organism organism subclass material entity http://purl.obolibrary.org/obo/obi.owl
OBI:1110128 immune epitope assay immune epitope assay http://purl.obolibrary.org/obo/obi.owl
OBI:1110037 assay measuring binding of a T cell epitope:MHC:TCR complex assay measuring binding of a T cell epitope:MHC:TCR complex subclass immune epitope assay http://purl.obolibrary.org/obo/obi.owl
PR:000000001 protein protein subclass material entity http://purl.obolibrary.org/obo/pr.owl
PR:000004580 Beta-2-microglobulin Beta-2-microglobulin equivalent protein ('gene product of' some 'Beta-2-microglobulin locus') A protein that is a translation product of the human B2M gene or a 1:1 ortholog thereof. http://purl.obolibrary.org/obo/pr.owl
REO:0000079 genetic locus genetic locus subclass genetic entity a nucleic acid sequence region that is part of a genome and represents a specified location or region on a chromosome or other genomic element. http://purl.obolibrary.org/obo/reo.owl
SO:0000355 haplotype_block haplotype_block subclass genetic entity A region of the genome which is co-inherited as the result of the lack of historic recombination within it. http://purl.obolibrary.org/obo/so.owl
SO:0001024 haplotype haplotype subclass genetic entity A haplotype is one of a set of coexisting sequence variants of a haplotype block. http://purl.obolibrary.org/obo/so.owl
REO:0000079 genetic locus genetic locus a nucleic acid sequence region that is part of a genome and represents a specified location or region on a chromosome or other genomic element. http://purl.obolibrary.org/obo/reo.owl
SO:0000355 haplotype_block haplotype_block A region of the genome which is co-inherited as the result of the lack of historic recombination within it. http://purl.obolibrary.org/obo/so.owl
SO:0001024 haplotype haplotype A haplotype is one of a set of coexisting sequence variants of a haplotype block. http://purl.obolibrary.org/obo/so.owl
owl:Thing owl:Thing
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cerberus
ontodev-cogs
ontodev-gizmos==0.1.3
ontodev-valve
openpyxl
8 changes: 8 additions & 0 deletions src/validation/datatype.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
datatype parent match level description instructions replace
blank /^$/ ERROR a blank value (an empty string)
line /^[^\n]+$/ ERROR a single line of text (no line breaks) remove line breaks s/\n/ /g
trimmed line line /^\w.*\w$/ ERROR a line with no leading or trailing whitespace remove leading and trailing whitespace s/\s+(.*)\s+/\1/
label trimmed line ERROR an ontology term label
IRI trimmed line /^\S+$/ ERROR an Internationalized Resource Identifier remove whitespace characters
prefix trimmed line /^\w+$/ ERROR a valid prefix for a CURIE remove non-word characters
numeric /^[0-9]+$/ ERROR a sequence of digits remove non-numeric characters
26 changes: 26 additions & 0 deletions src/validation/field.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
table column condition
external Parent tree(Label)
evidence Conclusion tree(Label, external.Parent)
core Parent tree(Label, evidence.Conclusion)
chain Parent tree(Label, core.Parent)
genetic-locus Parent tree(Label, core.Parent)
haplotype Parent tree(Label, core.Parent)
molecule Parent tree(Label, core.Parent)
mutant-molecule Parent tree(Label, core.Parent)
serotype Parent tree(Label, core.Parent)
chain Parent under(chain.Parent, "protein")
external ID any(in("owl:Thing"), concat(in(prefix.prefix), ":", numeric))
genetic-locus Parent under(genetic-locus.Parent, "genetic locus")
haplotype-molecule Parent under(molecule.Parent, "protein complex")
haplotype-molecule In Taxon under(external.Parent, "organism")
haplotype-molecule With Haplotype under(haplotype.Parent, "haplotype")
haplotype Parent under(haplotype.Parent, "haplotype")
molecule Parent under(molecule.Parent, "protein complex")
molecule Alpha Chain any(blank, in(chain.Label))
molecule Beta Chain any(blank, in("Beta-2-microglobulin", chain.Label))
molecule With Haplotype any(blank, under(haplotype.Parent, "haplotype"))
molecule With Serotype any(blank, under(serotype.Parent, "serotype"))
mutant-molecule Parent under(mutant-molecule.Parent, "mutant MHC protein complex")
serotype-molecule Parent under(molecule.Parent, "protein complex")
serotype-molecule With Serotype under(serotype.Parent, "serotype")
serotype Parent under(serotype.Parent, "serotype")
14 changes: 14 additions & 0 deletions src/validation/prefix.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
prefix base

BFO http://purl.obolibrary.org/obo/BFO_
ECO http://purl.obolibrary.org/obo/ECO_
GO http://purl.obolibrary.org/obo/GO_
IAO http://purl.obolibrary.org/obo/IAO_
MRO http://purl.obolibrary.org/obo/MRO_
NCBITaxon http://purl.obolibrary.org/obo/NCBITaxon_
OBI http://purl.obolibrary.org/obo/OBI_
obo http://purl.obolibrary.org/obo/
owl http://www.w3.org/2002/07/owl#
PR http://purl.obolibrary.org/obo/PR_
REO http://purl.obolibrary.org/obo/REO_
SO http://purl.obolibrary.org/obo/SO_
9 changes: 9 additions & 0 deletions src/validation/rule.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
table when column when condition then column then condition level description
chain Parent in("protein") Gene under(genetic-locus.Parent, "genetic locus") ERROR
external Label not(in("owl:Thing")) Source Ontology not(blank) ERROR
genetic-locus Parent under(genetic-locus.Parent, "MHC locus", direct=True) In Taxon under(external.Parent, "organism") ERROR
haplotype Parent in("MHC haplotype") In Taxon under(external.Parent, "organism") ERROR
molecule Parent not(in("MHC protein complex")) In Taxon under(external.Parent, "organism") ERROR
mutant-molecule Parent under(mutant-molecule.Parent, "mutant MHC protein complex", direct=True) In Taxon under(external.Parent, "organism") ERROR
mutant-molecule Parent not(any(in("mutant MHC protein complex"), under(mutant-molecule.Parent, "mutant MHC protein complex", direct=True))) Mutant Of under(molecule.Parent, "protein complex") ERROR
serotype-molecule Parent not(any(in("MHC protein complex"), under(serotype.Parent, "MHC serotype", direct=True))) In Taxon under(external.Parent, "organism") ERROR