diff --git a/CITATION.cff b/CITATION.cff index 4a7dbb7e..c04c0aa6 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,6 +1,6 @@ title: STRchive -version: 2.14.0 -date-released: "2025-11-10" +version: 2.15.0 +date-released: "2026-1-21" url: https://github.com/dashnowlab/STRchive authors: - family-names: Dashnow diff --git a/data/STRchive-loci.json b/data/STRchive-loci.json index bf0c5861..fef04687 100644 --- a/data/STRchive-loci.json +++ b/data/STRchive-loci.json @@ -900,7 +900,7 @@ "stop_t2t": 27584155, "disease": "Frontotemporal dementia (FTD) and/or amyotrophic lateral sclerosis (ALS)", "inheritance": ["AD"], - "disease_description": "Pure frontotemporal dementia, pure amyotrophic lateral sclerosis or combination of the two [@pmid:39349043]. Nominal associations with risk of Parkinson's has also been reported [@pmid:41074692].", + "disease_description": "Pure frontotemporal dementia, pure amyotrophic lateral sclerosis or combination of the two [@pmid:39349043]. Nominal associations with risk of Parkinson's have also been reported [@pmid:41074692].", "hpo_terms": null, "prevalence": null, "prevalence_details": "The expansion of a hexanucleotide repeat GGGGCC in C9orf72 is the most common known cause of ALS accounting for ~ 40% familial cases and ~ 7% sporadic cases in the European population; overall ALS incidence is 1-2/100,000 person-years, point prevalence is 3-5/100,000 (Europe/US); lifetime risk is 1 in 300 [@pmid:31315673]. Related individuals to patients with C9orf72-ALS appear at an increased risk of disease regardless of carrier status [@pmid:38149039; @pmid:39315390]. C9orf72-FTD is estimated to be 0.04-134:100,000 [@genereviews:NBK268647], and by our estimates 0.65-1.56/100,000 for C9orf72-ALS. The expansion has been found across ethnicities/ancestries, with population-dependent prevalence, highest in those with northern European ancestry [@genereviews:NBK268647].", @@ -909,7 +909,7 @@ "age_onset_max": 91.0, "typ_age_onset_min": 50.0, "typ_age_onset_max": 64.0, - "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeat of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476].", + "details": "FTD and ALS form a clinical spectrum [@pmid:37388914; @pmid:22406228]. The clinical ranges of the C9orf72 locus remain ambiguous [@stripy:C9ORF72]: most healthy controls have alleles up to 24 repeats [@pmid:28319737] yet 24-30 repeats are associated with ALS [@pmid:31315673] and while 60 repeats is frequently used as a threshold for uncertain alleles, the exact threshold of pathogenicity remains unclear [@genereviews:NBK268647; @pmid:38099605]. Repeats of 80 motifs and lower appear to have delayed onset for any phenotype [@pmid:28319737]. >250 repeats are associated with a full FTD/ALS disease state [@pmid:31048495], but pathogenic alleles can range from 30 to more than 4000 repeats [@pmid:38099605; @pmid:39709476]. Penetrance appears to also be age-dependent, with environmental factors and specific phenotypes associated with sex and age at onset [@pmid:28522837]. Methylation appears to increase with expansion length and age [@pmid:39709476].", "mechanism": "Ambiguous", "mechanism_detail": "The HRE forms DNA and RNA G-quadruplexes with distinct structures and promotes RNA/DNA hybrids (R-loops). The structural polymorphism causes a repeat length-dependent accumulation of transcripts aborted in the HRE region [@omim:105500]. Addiitonal mechanisms theorized include protein loss of function and RNA gain of function [@pmid:37847372]. Multiple cell types in the prefrontal cortex, including oligodendrocytes, microglia, astrocytes, and neurons, appear impacted during pathogenesis [@pmid:39999167].", "year": "2011 [@pmid:21944778]", @@ -962,7 +962,7 @@ "stop_t2t": 13333176, "disease": "Spinocerebellar ataxia type 6", "inheritance": ["AD"], - "disease_description": "Spinocerebellar ataxia type 6 (SCA6) is the most common subtype of autosomal dominant cerebellar ataxia type III (ADCA type III) characterized by late-onset and slowly progressive gait ataxia and other cerebellar signs such as impaired muscle coordination and nystagmus [@mondo:0008457].", + "disease_description": "Spinocerebellar ataxia type 6 (SCA6) is the most common subtype of autosomal dominant cerebellar ataxia type III (ADCA type III) characterized by late-onset and slowly progressive gait ataxia and other cerebellar signs such as impaired muscle coordination and nystagmus [@mondo:0008457]. Ao, et al. has proposed that this expansion may have effects on chronotype, differing by sex and menopausal status, as well as depresssion severity [@pmid:41358280].", "hpo_terms": null, "prevalence": "2.65/100000", "prevalence_details": "13-15% of global SCA prevalence, estimated to be 0.02-31/100,000 [@genereviews:NBK1140; @pmid:29100084]: resultant estimate is 0.3-5/100,000. Found across ethnicities/ancestries, with population-dependent prevalence [@genereviews:NBK1140].", @@ -1223,7 +1223,7 @@ "stop_hg19": 45196360, "start_t2t": 42132054, "stop_t2t": 42132091, - "disease": "Progressive Myoclonic Epilepsy Type 1 (EPM1) Unverricht-Lundborg Disease (ULD)", + "disease": "Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD)", "inheritance": ["AR"], "disease_description": "Unverricht-Lundborg disease (ULD) is a rare progressive myoclonic epilepsy disorder characterized by action- and stimulus-sensitive myoclonus, and tonic-clonic seizures with ataxia, but with only a mild cognitive decline over time [@mondo:0009698].", "hpo_terms": null, @@ -1626,7 +1626,7 @@ "age_onset_max": 87.0, "typ_age_onset_min": 42.0, "typ_age_onset_max": 70.0, - "details": "Higher repeat size is associated with earlier age of onset [@pmid:39263992]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The affect of interruptions on penetrance and onset has been shown in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Variation in flanking regions appear to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles make pose as susceptibility factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554].", + "details": "Higher repeat size is associated with earlier age of onset [@pmid:39263992]. The 250-300 repeats range is linked to incomplete penetrance and >300 repeats with complete penetrance in some studies and resources [@genereviews:NBK599589; @pmid:37399286; @pmid:39227614]. However, our thresholds are taken from suggestions made by Mohren et al upon evaluation of 169 cases and 802 controls; the authors propose lower thresholds based on pathogenic cases of shorter pure repeats [@pmid:39227614]. Additionally, this study suggests that benign motifs may disrupt the formation of secondary structures in DNA/RNA, leading to reduced pathogenicity. The affect of interruptions on penetrance and onset has been shown in patients, with uninterrupted expansions apparently necessary for disease [@pmid:40007153]. Variation in flanking regions appear to correlate with repeat size [@pmid:39227614; @pmid:38937606]. Intermediate alleles make pose as susceptibility factors or be associated with a phenotypic spectrum (multiple system atrophy) [@pmid:39227614; @pmid:39604554]. Finally, a complex (TTC/TGC) ≥300 repeat expansion has been associated as a risk factor for Parkinson's disease [@pmid:41327893; @pmid:41277530].", "mechanism": "LoF", "mechanism_detail": "Reduced transcript 2 [@pmid:36516086].", "year": "2023 [@pmid:36493768]", diff --git a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed index 49877b6f..41339dcd 100644 --- a/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed +++ b/data/catalogs/STRchive-disease-loci.T2T-chm13.general.bed @@ -62,7 +62,7 @@ chr19 18921630 18921645 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dy chr19 48597739 48597756 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 chr20 2683189 2683230 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 chr20 4738633 4738705 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease -chr21 42132054 42132091 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1) Unverricht-Lundborg Disease (ULD) +chr21 42132054 42132091 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 20143615 20143660 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 46280059 46280134 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 chrX 24597766 24597802 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome diff --git a/data/catalogs/STRchive-disease-loci.hg19.general.bed b/data/catalogs/STRchive-disease-loci.hg19.general.bed index a44fe09d..0dddc18d 100644 --- a/data/catalogs/STRchive-disease-loci.hg19.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg19.general.bed @@ -62,7 +62,7 @@ chr19 18896844 18896860 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dy chr19 46273462 46273524 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 chr20 2633378 2633403 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 chr20 4680043 4680139 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease -chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1) Unverricht-Lundborg Disease (ULD) +chr21 45196323 45196360 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 19754285 19754330 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 46191234 46191304 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 chrX 25031646 25031682 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome diff --git a/data/catalogs/STRchive-disease-loci.hg38.general.bed b/data/catalogs/STRchive-disease-loci.hg38.general.bed index b77e7175..961e3bab 100644 --- a/data/catalogs/STRchive-disease-loci.hg38.general.bed +++ b/data/catalogs/STRchive-disease-loci.hg38.general.bed @@ -62,7 +62,7 @@ chr19 18786034 18786050 EDM1-PSACH_COMP COMP GTC GTC 6 AD Multiple epiphyseal dy chr19 45770204 45770266 DM1_DMPK DMPK CAG CAG 50 AD Myotonic dystrophy type 1 chr20 2652732 2652757 SCA36_NOP56 NOP56 GGCCTG GGCCTG 650 AD Spinocerebellar ataxia type 36 chr20 4699397 4699493 CJD_PRNP PRNP GGTGGTGGCTGGGGGCAGCCTCAT CCTCATGGTGGTGGCTGGGGGCAG 5 AD Creutzfeldt-Jakob disease -chr21 43776442 43776479 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1) Unverricht-Lundborg Disease (ULD) +chr21 43776442 43776479 EPM1_CSTB CSTB CGCGGGGCGGGG CGCGGGGCGGGG 30 AR Progressive Myoclonic Epilepsy Type 1 (EPM1), a.k.a Unverricht-Lundborg Disease (ULD) chr22 19766762 19766807 TOF_TBX1 TBX1 GCN GCN 25 AD Tetralogy of Fallot chr22 45795354 45795424 SCA10_ATXN10 ATXN10 ATTCT ATTCT 800 AD Spinocerebellar ataxia type 10 chrX 25013529 25013565 PRTS_ARX ARX NGC NGC 20 XR Partington syndrome