Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
title: STRchive
version: 2.15.0
version: 2.16.0
date-released: "2026-1-21"
url: https://github.com/dashnowlab/STRchive
authors:
Expand Down
560 changes: 280 additions & 280 deletions data/STRchive-citations.json

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions data/STRchive-loci.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@
"stop_hg19": 147582273,
"start_t2t": 146765190,
"stop_t2t": 146765342,
"disease": "Fragile X syndrome, FRAXE type",
"disease": "Intellectual developmental disorder, Fragile X intellectual disability",
"inheritance": ["XR"],
"disease_description": "A nonsyndromic X-linked mental retardation (NS-XLMR) characterized by mild intellectual deficit. FRAXE is the most common form of NS-XLMR [@mondo:0010659].",
"hpo_terms": null,
"disease_description": "A nonsyndromic X-linked intellectual development disorder characterized by mild intellectual deficit. FRAXE is the most common form of non-syndromic X-linked disability [@mondo:0010659].",
"hpo_terms": ["HP:0000718 Aggressive behavior", "HP:0000713 Agitation", "HP:0000729 Autistic behavior", "HP:0002312 Clumsiness", "HP:0000722 Compulsive behaviors", "HP:0000750 Delayed speech and language development", "HP:0001249 Intellectual disability"],
"prevalence": "2/50000",
"prevalence_details": "1-4/100,000 males [@url:medlineplus.gov/genetics/condition/fragile-xe-syndrome]; 1/50-100,000 males, more than 50 families [@pmid:11246464]. Found in populations around the globe, including in the UK, US, Canada, Taiwan, Germany, Greece, Cyprus, Spain, and Finland [@pmid:11246464].",
"prevalence_details": "1-4/100,000 males [@url:medlineplus.gov/genetics/condition/fragile-xe-syndrome]; 1/50-100,000 males, more than 50 families 11246464]. Found in populations around the globe, including in the UK, US, Canada, Taiwan, Germany, Greece, Cyprus, Spain, and Finland [@pmid:11246464].",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this removal of pmid intended?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no it was not

"age_onset": "Typical: 2-10 [@pmid:11246464]. Range: 1-10; developmental delays without physical features can make onset difficult to detect until schooling [@omim:309548].",
"age_onset_min": 1.0,
"age_onset_max": 10.0,
Expand Down Expand Up @@ -926,8 +926,8 @@
"benign_min": 2,
"benign_max": 23,
"intermediate_min": 24,
"intermediate_max": 60,
"pathogenic_min": 251,
"intermediate_max": 30,
"pathogenic_min": 31,
"pathogenic_max": 4088,
"motif_len": 6,
"ref_copies": 10.8,
Expand Down Expand Up @@ -1634,10 +1634,10 @@
"gene_strand": "-",
"reference_motif_reference_orientation": ["GAA"],
"pathogenic_motif_reference_orientation": ["GAA"],
"benign_motif_reference_orientation": ["GAAGGA", "GAAGAAGAAGAAGCA"],
"benign_motif_reference_orientation": ["GGA", "GCA"],
"unknown_motif_reference_orientation": [],
"pathogenic_motif_gene_orientation": ["CTT"],
"benign_motif_gene_orientation": ["CCTTCT", "CTGCTTCTTCTTCTT"],
"benign_motif_gene_orientation": ["CCT", "CTG"],
"unknown_motif_gene_orientation": [],
"locus_structure": [],
"benign_min": 8,
Expand Down Expand Up @@ -1679,7 +1679,7 @@
"stop_t2t": 146176769,
"disease": "Fragile X syndrome (FXS), fragile X-associated tremor/ataxia syndrome (FXTAS), and fragile X-associated primary ovarian insufficiency FXPOI/POF1",
"inheritance": ["XD"],
"disease_description": "A genetic syndrome caused by mutations in the FMR1 gene which is responsible for the expression of the fragile X mental retardation 1 protein. This protein participates in neural development. This syndrome is manifested with mental, emotional, behavioral, physical, and learning disabilities.; Any primary ovarian failure in which the cause of the disease is a mutation in the FMR1 gene.; Fragile X-associated tremor/ataxia syndrome (FXTAS) is a rare neurodegenerative disorder characterized by adult-onset progressive intention tremor and gait ataxia [@mondo:0010383; @mondo:0010706; @mondo:0010382].",
"disease_description": "A genetic syndrome caused by mutations in the FMR1 gene which is responsible for the expression of the fragile X messenger ribonucleoprotein 1 (FMR1) protein. This protein participates in neural development. This syndrome is manifested with mental, emotional, behavioral, physical, and learning disabilities.; Any primary ovarian failure in which the cause of the disease is a mutation in the FMR1 gene.; Fragile X-associated tremor/ataxia syndrome (FXTAS) is a rare neurodegenerative disorder characterized by adult-onset progressive intention tremor and gait ataxia [@mondo:0010383; @mondo:0010706; @mondo:0010382].",
"hpo_terms": null,
"prevalence": "14/100000",
"prevalence_details": "Incidence of full mutation in males 19/100,000; prevalence 14/100,000 [@genereviews:NBK1384]. Female prevalence 9/100,000 [@pmid:24700618]. Known carrier frequency is approximately 300-500/100,000 but detected was 11/100,000 [@pmid:29100084]. FXS prevalence 1:7000 males, 1:11,000 females; FX premutation carriers 1:290-855 males, 1:148-300 females [@isbn:978-3-031-66932-3]. Found worldwide [@genereviews:NBK1384]. In Thailand, 1 in 600 women carry a premutation, and 1 in 400 carry a 'gray zone' allele [@pmid:39320553].",
Expand All @@ -1688,9 +1688,9 @@
"age_onset_max": 78.0,
"typ_age_onset_min": 1.0,
"typ_age_onset_max": 65.0,
"details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108].",
"details": "Intermediate or 'gray zone' occur at 45-54 alleles and may be unstable enough to expand into the premutation range, as well as associate with parkinsonism [@pmid:32463542; @genereviews:NBK1384]. FXTAS/POI occurs at 55-200 repeats, FXS >200, late onset; AGG and CTG interruptions documented [@genereviews:NBK1384; @pmid:29868108]. Women with the premutation have been reported showing episodic memory deficits, similar to those seen in AD [@pmid:41555826].",
"mechanism": "LoF/GoF",
"mechanism_detail": "Loss of function via transcriptional silencing in FXS, RNA gain of function in FXTAS/FXPOI [@pmid:16205714; @pmid:36169768].",
"mechanism_detail": "Loss of function via transcriptional silencing in FXS, RNA gain of function in FXTAS/FXPOI [@pmid:16205714; @pmid:36169768]. *PRKGG* appears to modulate neurotoxicity [@pmid:41507195].",
"year": "1992 [@pmid:1605194]; causative gene discovered in 1991 [@pmid:1710175]",
"location_in_gene": "5' UTR",
"gene_strand": "+",
Expand Down Expand Up @@ -1884,9 +1884,9 @@
"age_onset_max": 70.0,
"typ_age_onset_min": 20.0,
"typ_age_onset_max": 34.0,
"details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784].",
"details": "Benign repeats range from absent [@gnomad:GIPC1] to 32 [@genereviews:NBK535148], while pathogenic alleles range from 73-164 repeats [@pmid:38876750; @genereviews:NBK535148]. Findings suggest that alternative initiation sites and an upstream CTG codon serve as the initiation site for RAN translation [@pmid:41121761]. Intermediate alleles have undetermined significance but may represent a phenotypic spectrum [@pmid:32413282]. Interruptions documented: CGA [@pmid:35245110]. Interruptions proposed but not confirmed in primary literature: TCG/CCT/TTG [@pmid:38467784].",
"mechanism": "LoF/GoF?",
"mechanism_detail": "RNA mediated toxicity hypothesized [@omim:618940], still unknown [@pmid:36169768].",
"mechanism_detail": "Findings suggest that the mechanism is likely not LoF, but the mechanism is otherwise unknown [@pmid:41121761]. This expansion appears to be predominantly RAN translated into a toxic protein [@pmid:41121761]. This protein has been reported to impair cell proliferation, induce cytotoxicity and apoptosis in multiple cell lines, and caused phenotypic defects in a zebrafish model [@pmid:41121761].",
"year": "2020 [@pmid:32413282]",
"location_in_gene": "5' UTR",
"gene_strand": "-",
Expand Down Expand Up @@ -2838,9 +2838,9 @@
"stop_hg19": 145209354,
"start_t2t": 148519695,
"stop_t2t": 148519738,
"disease": "Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3",
"disease": "Neuronal intranuclear inclusion disease, Alzheimer disease and parkinsonism phenotype, Oculopharyngodistal myopathy (OPDM) type 3, hereditary essential tremor type 6",
"inheritance": ["AD"],
"disease_description": "Neuronal intranuclear inclusion disease (NIID) is a very rare multisystem neurodegenerative disorder characterized by the presence of eosinophilic intranuclear inclusions in neuronal and glial cells, and neuronal loss [@mondo:0011327].",
"disease_description": "Neuronal intranuclear inclusion disease (NIID) is a very rare multisystem neurodegenerative disorder characterized by the presence of eosinophilic intranuclear inclusions in neuronal and glial cells, and neuronal loss [@mondo:0011327]. Due to overlapping phenotypes and the shared locus, it is unclear whether these four diseases are comorbid, synonymous, or entirely separate.",
"hpo_terms": null,
"prevalence": null,
"prevalence_details": ">400 patients reported in literature [@pmid:37371433]. Found in individuals of East Asian ancestry [@pmid:38876750].",
Expand All @@ -2851,7 +2851,7 @@
"typ_age_onset_max": 70.0,
"details": "Benign alleles are less than 38 repeats, while pathogenic alleles contain 66+ repeats [@genereviews:NBK535148]. Intermediate alleles may be associated with a phenotypic spectrum, and even pathogenic cases can have variable phenotype [@pmid:39055960; @pmid:39496005]: NOTCH2NLC expansions have been linked Alzheimer's disease and Parkinson's disease, leading to a potential role in NIID-related disorders [@pmid:31178126]. Age of onset inversely related to allele size [@pmid:38377026]. Motif variation in controls: (AGG)(CGG)n(AGG)0-3(CGG)0-2. GGA and AGC interruptions may influence phenotype [@pmid:34718964]. Interruptions documented: GGA, GGG [@pmid:35245110]; ACCGAGAAGATGCCCGCCCTGC interruption proposed but not confirmed [@pmid:38467784]. Detection may be challenging due to parology between genes: C253572.1, NOTCH2, NOTCH2NL, NBPF14, NBPF19.",
"mechanism": "GoF",
"mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@doi:10.1186/s12964-025-02079-1].",
"mechanism_detail": "Polyglycine expansion; may relate to methylation or RNA pathogenicity [@omim:603472; @pmid:36169768; @pmid:38467784]. The polyglycine-containing protein sequesters a key subunit of transcription factor NF-κB in nuclear inclusions, leading to impaired autophagy [@doi:10.1186/s12964-025-02079-1]. Tau pathology is evident, changes in p-tau levels and tau deposition have been reported [@pmid:41539185].",
"year": "2019 [@pmid:31332380]",
"location_in_gene": "5' UTR",
"gene_strand": "+",
Expand Down Expand Up @@ -3026,7 +3026,7 @@
"stop_t2t": 41719805,
"disease": "Congenital central hypoventilation syndrome",
"inheritance": ["AD"],
"disease_description": "A rare disease due to a severely impaired central autonomic control of breathing and dysfunction of the autonomous nervous system. The incidence is estimated to be at 1 of 200 000 livebirths. A heterozygous mutation of PHOX-2B gene is found in 90% of the patients. Association with a Hirschsprung's disease is observed in 16% of the cases (adapted from Mondo) [@mondo:0800026].",
"disease_description": "A rare disease due to a severely impaired central autonomic control of breathing and dysfunction of the autonomous nervous system. The incidence is estimated to be at 1 of 200 000 livebirths. A heterozygous mutation of PHOX-2B gene is found in 90% of the patients. Association with a Hirschsprung's disease is observed in 16% of the cases (adapted from Mondo) [@mondo:0800026]. Hyperinsulinism has been observed in patients [@pmid:41531556].",
"hpo_terms": null,
"prevalence": null,
"prevalence_details": "Incidence is 1:148000-200000 births (Estimated, may include mild/undiagnosed or be overestimated globally) [@genereviews:NBK1427]. Rare, but reported worldwide [@pmid:15121777].",
Expand Down Expand Up @@ -3583,12 +3583,12 @@
"location_in_gene": "Intron 2",
"gene_strand": "-",
"reference_motif_reference_orientation": ["AAAAG"],
"pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AGGGC", "AAGGC", "AGAGG"],
"benign_motif_reference_orientation": ["AAAAG", "AAAGG", "AAGAG", "AAAGGG"],
"unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGAAC", "AGGGG", "GAAAC", "GGGAC", "GTGAG", "AAAAGA", "AAAGGA", "GGAAAG"],
"pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCCTG", "CCTTG", "CCTCT"],
"benign_motif_gene_orientation": ["CTTTT", "CCTTT", "CTCTT", "CCCTTT"],
"unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "CTTGT", "ACCTT", "CTGTT", "CCCCT", "CGTTT", "CCCGT", "ACCTC", "CTTTTT", "CCTTTT", "CCCTTT"],
"pathogenic_motif_reference_orientation": ["AAGGG", "ACAGG", "AAAGG", "AGGGC"],
"benign_motif_reference_orientation": ["AAAAG", "AAAGGG"],
"unknown_motif_reference_orientation": ["AAAAA", "AAAAC", "AACGG", "AAGAC", "AAGGT", "AGGGG", "AAGAG", "AAAAGG", "AAACG", "AACAG", "AGGTG", "ACGGG", "AAAAAG", "AAGGC"],
"pathogenic_motif_gene_orientation": ["CCCTT", "CCTGT", "CCTTT", "CCCTG"],
"benign_motif_gene_orientation": ["CTTTT", "CCCTTT"],
"unknown_motif_gene_orientation": ["TTTTT", "GTTTT", "CCGTT", "CTTGT", "ACCTT", "CCCCT", "CTCTT", "CCTTTT", "CGTTT", "CTGTT", "ACCTC", "CCCGT", "CTTTTT", "CCTTG"],
"locus_structure": [
{
"motif": "AAAAG",
Expand Down Expand Up @@ -3823,8 +3823,8 @@
"additional_literature": ["pmid:41426430", "pmid:41219789", "pmid:38961870", "pmid:38467733", "pmid:38059543", "pmid:37592133", "pmid:36740228", "pmid:36622139", "pmid:36092952", "pmid:33791773", "pmid:33721773", "pmid:33681653", "pmid:33501421", "pmid:33040085", "pmid:32973343", "pmid:32203200", "pmid:32194077", "pmid:32174879", "pmid:31664039", "pmid:31483537", "pmid:30559482", "pmid:30351492", "pmid:30194086"]
},
{
"id": "XLMR_SOX3",
"disease_id": "XLMR",
"id": "XLID_SOX3",
"disease_id": "XLID, PHP",
"gene": "SOX3",
"chrom": "chrX",
"start_hg38": 140504316,
Expand All @@ -3833,7 +3833,7 @@
"stop_hg19": 139586526,
"start_t2t": 138816203,
"stop_t2t": 138816248,
"disease": "X-linked panhypopituitarism ; X-linked mental retardation with isolated growth hormone",
"disease": "X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"disease": "X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism",
"disease": "X-linked intellectual developmental disorder with isolated growth hormone deficiency; X-linked panhypopituitarism (PHPX)",

"inheritance": ["XR"],
"disease_description": "X-linked isolated growth hormone deficiency (GHD) or combined pituitary hormone deficiency (CPHD) patients with or without intellectual disability [@pmid:24346842].",
"hpo_terms": null,
Expand Down
6 changes: 3 additions & 3 deletions data/catalogs/STRchive-disease-loci.T2T-chm13.TRGT.bed
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ chr3 131917482 131917635 ID=DM2_CNBP;MOTIFS=CAGG,CAGA,CA;STRUC=<TR>
chr3 141687011 141687054 ID=BPES_FOXL2;MOTIFS=NGC;STRUC=<TR>
chr3 186521667 186521706 ID=FAME4_YEATS2;MOTIFS=TTTTA,TTTCA;STRUC=<TR>
chr4 3073603 3073723 ID=HD_HTT;MOTIFS=CAG,CCG;STRUC=<TR>
chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AGGGC,AAGGC,AGAGG,AAAGG,AAGAG,AAAGGG;STRUC=<TR>
chr4 39318077 39318136 ID=CANVAS_RFC1;MOTIFS=AAAAG,AAGGG,ACAGG,AAAGG,AGGGC,AAAGGG;STRUC=<TR>
chr4 41719745 41719805 ID=CCHS_PHOX2B;MOTIFS=GCN;STRUC=<TR>
chr4 162693303 162693405 ID=FAME7_RAPGEF2;MOTIFS=TTTTA,TTTCA;STRUC=<TR>
chr5 10295525 10295593 ID=FAME3_MARCHF6;MOTIFS=TTTTA,TTTCA;STRUC=<TR>
Expand All @@ -38,7 +38,7 @@ chr12 111575873 111575940 ID=SCA2_ATXN2;MOTIFS=CTG;STRUC=<TR>
chr12 123532573 123532603 ID=OPDM4_RILPL1;MOTIFS=GGC;STRUC=<TR>
chr13 69361213 69361270 ID=SCA8_ATXN8OS;MOTIFS=CTA,CTG;STRUC=<TR>
chr13 99196358 99196404 ID=HPE5_ZIC2;MOTIFS=GCN;STRUC=<TR>
chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GAAGGA,GAAGAAGAAGAAGCA;STRUC=<TR>
chr13 101377549 101377792 ID=SCA27B_FGF14;MOTIFS=GAA,GGA,GCA;STRUC=<TR>
chr14 17522488 17522519 ID=OPMD_PABPN1;MOTIFS=GCN;STRUC=<TR>
chr14 86300519 86300603 ID=SCA3_ATXN3;MOTIFS=CTG;STRUC=<TR>
chr15 20458510 20458536 ID=ALS1_NIPA1;MOTIFS=GCG;STRUC=<TR>
Expand Down Expand Up @@ -70,6 +70,6 @@ chrX 30882677 30882751 ID=DMD_DMD;MOTIFS=TTC,T;STRUC=<TR>
chrX 65975147 65975250 ID=SBMA_AR;MOTIFS=GCA;STRUC=<TR>
chrX 69887153 69887230 ID=XDP_TAF1;MOTIFS=AGAGGG;STRUC=<TR>
chrX 135876774 135876804 ID=VACTERLX_ZIC3;MOTIFS=GCN;STRUC=<TR>
chrX 138816203 138816248 ID=XLMR_SOX3;MOTIFS=NGC;STRUC=<TR>
chrX 138816203 138816248 ID=XLID_SOX3;MOTIFS=NGC;STRUC=<TR>
chrX 146176677 146176769 ID=FXS_FMR1;MOTIFS=CGG;STRUC=<TR>
chrX 146765190 146765342 ID=FRAXE_AFF2;MOTIFS=GCC;STRUC=<TR>
2 changes: 1 addition & 1 deletion data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,6 @@ chrX 30882743 30882751 T 1 DMD_DMD_flank
chrX 65975147 65975250 GCA 3 SBMA_AR
chrX 69887153 69887230 AGAGGG 6 XDP_TAF1
chrX 135876774 135876804 GCN 3 VACTERLX_ZIC3
chrX 138816203 138816248 NGC 3 XLMR_SOX3
chrX 138816203 138816248 NGC 3 XLID_SOX3
chrX 146176677 146176769 CGG 3 FXS_FMR1
chrX 146765190 146765342 GCC 3 FRAXE_AFF2
Binary file modified data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed.gz
Binary file not shown.
Loading