From 57c668033a3739674e9eacc8c6e78698ef6c1d78 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 29 Jan 2025 16:29:02 +0100 Subject: [PATCH 1/7] Prepare next release 6.5.0-SNAPSHOT --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index c6881f1c4..244646d0d 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 9be53f3ad..7e89bbe9d 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 869198a5e..c5fe18a0e 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 519cf7116..9a6de7901 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index e7f580261..3cfd76f69 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index b70c87e6e..9f0c8d7c5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.4.0 + 6.5.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.4.0 - 3.4.0 + 5.5.0-SNAPSHOT + 3.5.0-SNAPSHOT 0.1.0 2.11.4 1.9.13 From 5b662dd45078905c48a3d0e633427af9b76795c7 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 19 Feb 2025 11:36:16 +0100 Subject: [PATCH 2/7] FIX CI/CD workflow to delete task docker images on merge fails #TASK-6880 --- .github/workflows/pull-request-merge.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull-request-merge.yml b/.github/workflows/pull-request-merge.yml index 4b1c43c68..5f3f038ea 100644 --- a/.github/workflows/pull-request-merge.yml +++ b/.github/workflows/pull-request-merge.yml @@ -9,8 +9,9 @@ on: - closed jobs: - delete-docker: - uses: opencb/java-common-libs/.github/workflows/delete-docker-hub-workflow.yml@develop + call-delete-docker: + name: Call Reusable Delete Docker Workflow + uses: opencb/cellbase/.github/workflows/reusable-delete-docker.yml@develop with: - cli: python3 ./build/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }} + task: ${{ github.head_ref }} secrets: inherit From a9f28ff217e0c190817092f6eb100de27964a34a Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 7 May 2025 13:38:20 +0200 Subject: [PATCH 3/7] Prepare Port Patch 5.8.5 -> 6.5.0 XB 1.10.9 - 2.5.0 #TASK-7531 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index e457774e2..244646d0d 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 76f7a4cc0..7e89bbe9d 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 48e821e55..c5fe18a0e 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 661a2c913..ee9f2176b 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 0ea8bdbfa..8da15b46f 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 6ef7610e4..f53cfc54e 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.8.5 + 6.5.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 4.12.1-SNAPSHOT - 2.12.4-SNAPSHOT + 5.5.0-SNAPSHOT + 3.5.0-SNAPSHOT 0.1.0 2.11.4 1.9.13 From 82480f1496c98d8f713b4e9e62aeb33e6a051acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 7 May 2025 14:02:57 +0200 Subject: [PATCH 4/7] lib: resolve conflics in CosmicIndexer, #TASK-7531 --- .../clinical/variant/CosmicIndexer.java | 560 ------------------ 1 file changed, 560 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java index fd505e723..4e7bbecad 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java @@ -16,80 +16,23 @@ package org.opencb.cellbase.lib.builders.clinical.variant; -<<<<<<< HEAD -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.commons.lang3.StringUtils; -import org.opencb.biodata.models.variant.avro.*; -import org.opencb.cellbase.core.models.DataReleaseSource; -======= import org.opencb.biodata.formats.io.FileFormatException; import org.opencb.biodata.formats.variant.cosmic.CosmicParser; import org.opencb.cellbase.core.exception.CellBaseException; ->>>>>>> release-6.x.x import org.opencb.cellbase.lib.EtlCommons; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; -import static org.opencb.cellbase.lib.EtlCommons.COSMIC_VERSION_FILENAME; - public class CosmicIndexer extends ClinicalIndexer { private final Path cosmicFile; private final String assembly; -<<<<<<< HEAD - private static final int GENE_NAMES_COLUMN = 0; - private static final int HGNC_COLUMN = 3; - private static final int PRIMARY_SITE_COLUMN = 7; - private static final int SITE_SUBTYPE_COLUMN = 8; - private static final int PRIMARY_HISTOLOGY_COLUMN = 11; - private static final int HISTOLOGY_SUBTYPE_COLUMN = 12; - private static final int ID_COLUMN = 16; - private static final int COSM_ID_COLUMN = 17; - private static final int HGVS_COLUMN = 19; - private static final int MUTATION_DESCRIPTION_COLUMN = 21; - private static final int MUTATION_ZYGOSITY_COLUMN = 22; - private static final int FATHMM_PREDICTION_COLUMN = 29; - private static final int FATHMM_SCORE_COLUMN = 30; - private static final int MUTATION_SOMATIC_STATUS_COLUMN = 31; - private static final int PUBMED_PMID_COLUMN = 32; - private static final int SAMPLE_SOURCE_COLUMN = 34; - private static final int TUMOUR_ORIGIN_COLUMN = 35; - - private static final String HGVS_INSERTION_TAG = "ins"; - private static final String HGVS_SNV_CHANGE_SYMBOL = ">"; - private static final String HGVS_DELETION_TAG = "del"; - private static final String HGVS_DUPLICATION_TAG = "dup"; - private static final String CHROMOSOME = "CHR"; - private static final String START = "START"; - private static final String END = "END"; - private static final String REF = "REF"; - private static final String ALT = "ALT"; - private int invalidPositionLines = 0; - private int invalidSubstitutionLines = 0; - private int invalidDeletionLines = 0; - private int invalidInsertionLines = 0; - private int invalidDuplicationLines = 0; - private int invalidMutationCDSOtherReason = 0; - - private static final String VARIANT_STRING_PATTERN = "[ACGT]*"; - - private String date; - private String version; - - private int ignoredCosmicLines = 0; - private long normaliseTime = 0; - private int rocksDBNewVariants = 0; - private int rocksDBUpdateVariants = 0; -======= private static final String COSMIC_VERSION = "v99"; ->>>>>>> release-6.x.x public CosmicIndexer(Path cosmicFile, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb) throws IOException { super(genomeSequenceFilePath); @@ -100,508 +43,6 @@ public CosmicIndexer(Path cosmicFile, boolean normalize, Path genomeSequenceFile this.rdb = rdb; } -<<<<<<< HEAD - private void init() { - mutationGRCh37GenomePositionPattern = Pattern.compile("(?<" + CHROMOSOME + ">\\S+):(?<" + START + ">\\d+)-(?<" + END + ">\\d+)"); - snvPattern = Pattern.compile("c\\.\\d+((\\+|\\-|_)\\d+)?(?<" + REF + ">(A|C|T|G)+)>(?<" + ALT + ">(A|C|T|G)+)"); - } - - public void index() throws RocksDBException { - try { - Path cosmicVersionPath = cosmicFile.getParent().resolve(COSMIC_VERSION_FILENAME); - if (!Files.exists(cosmicVersionPath)) { - throw new IOException("COSMIC version file " + cosmicVersionPath + " does not exist"); - } - ObjectMapper jsonObjectMapper = new ObjectMapper(); - ObjectReader jsonObjectReader = jsonObjectMapper.readerFor(DataReleaseSource.class); - DataReleaseSource dataReleaseSource = jsonObjectReader.readValue(cosmicVersionPath.toFile()); - - this.date = dataReleaseSource.getDate(); - this.version = dataReleaseSource.getVersion(); - - logger.info("Parsing cosmic file ..."); - - ProgressLogger progressLogger = new ProgressLogger("Parsed COSMIC lines:", - () -> EtlCommons.countFileLines(cosmicFile), 200).setBatchSize(10000); - - long t0, t1 = 0, t2 = 0; - List evidenceEntries = new ArrayList<>(); - SequenceLocation old = null; - - BufferedReader cosmicReader = FileUtils.newBufferedReader(cosmicFile); - cosmicReader.readLine(); // First line is the header -> ignore it - String line; - while ((line = cosmicReader.readLine()) != null) { - String[] fields = line.split("\t", -1); - - t0 = System.currentTimeMillis(); - EvidenceEntry evidenceEntry = buildCosmic(fields); - t1 += System.currentTimeMillis() - t0; - - SequenceLocation sequenceLocation = parseLocation(fields); - if (old == null) { - old = sequenceLocation; - } - - if (sequenceLocation != null && parseVariant(sequenceLocation, fields)) { - if (sequenceLocation.getStart() == old.getStart() && sequenceLocation.getAlternate().equals(old.getAlternate())) { - evidenceEntries.add(evidenceEntry); - } else { - boolean success = updateRocksDB(old, evidenceEntries); - t2 += System.currentTimeMillis() - t0; - // updateRocksDB may fail (false) if normalisation process fails - if (success) { - numberIndexedRecords += evidenceEntries.size(); - } else { - ignoredCosmicLines += evidenceEntries.size(); - } - old = sequenceLocation; - evidenceEntries.clear(); - evidenceEntries.add(evidenceEntry); - } - } else { - ignoredCosmicLines++; - } - totalNumberRecords++; - progressLogger.increment(1); - - if (totalNumberRecords % 10000 == 0) { - System.out.println("totalNumberRecords = " + totalNumberRecords); - System.out.println("numberIndexedRecords = " + numberIndexedRecords + " (" - + (numberIndexedRecords * 100 / totalNumberRecords) + "%)"); - System.out.println("ignoredCosmicLines = " + ignoredCosmicLines); - System.out.println("buildCosmic = " + t1); - - System.out.println("updateRocksDB = " + t2); - System.out.println("\tnormaliseTime = " + normaliseTime); - System.out.println("\trocksDBNewVariants = " + (numberNewVariants - rocksDBNewVariants)); - System.out.println("\trocksDBUpdateVariants = " + (numberVariantUpdates - rocksDBUpdateVariants)); - System.out.println(""); - - t1 = 0; - t2 = 0; - normaliseTime = 0; - rocksDBNewVariants = numberNewVariants; - rocksDBUpdateVariants = numberVariantUpdates; - } - } - } catch (RocksDBException | IOException e) { - logger.error("Error indexing Cosmic", e); - throw new RocksDBException(e.getMessage()); - } finally { - logger.info("Done"); - this.printSummary(); - } - } - - private void printSummary() { - logger.info("Total number of parsed Cosmic records: {}", totalNumberRecords); - logger.info("Number of indexed Cosmic records: {}", numberIndexedRecords); - logger.info("Number of new variants in Cosmic not previously indexed in RocksDB: {}", numberNewVariants); - logger.info("Number of updated variants during Cosmic indexing: {}", numberVariantUpdates); - - NumberFormat formatter = NumberFormat.getInstance(); - logger.info(formatter.format(ignoredCosmicLines) + " cosmic lines ignored: "); - if (invalidPositionLines > 0) { - logger.info("\t-" + formatter.format(invalidPositionLines) + " lines by invalid position"); - } - if (invalidSubstitutionLines > 0) { - logger.info("\t-" + formatter.format(invalidSubstitutionLines) + " lines by invalid substitution CDS"); - } - if (invalidInsertionLines > 0) { - logger.info("\t-" + formatter.format(invalidInsertionLines) + " lines by invalid insertion CDS"); - } - if (invalidDeletionLines > 0) { - logger.info("\t-" + formatter.format(invalidDeletionLines) + " lines by invalid deletion CDS"); - } - if (invalidDuplicationLines > 0) { - logger.info("\t-" + formatter.format(invalidDuplicationLines) + " lines because mutation CDS is a duplication"); - } - if (invalidMutationCDSOtherReason > 0) { - logger.info("\t-" + formatter.format(invalidMutationCDSOtherReason) - + " lines because mutation CDS is invalid for other reasons"); - } - } - - private boolean updateRocksDB(SequenceLocation sequenceLocation, List evidenceEntries) - throws RocksDBException, IOException { - // More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed - List normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), - sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); - if (normalisedVariantStringList != null) { - for (String normalisedVariantString : normalisedVariantStringList) { - VariantAnnotation variantAnnotation = getVariantAnnotation(normalisedVariantString.getBytes()); - List mergedEvidenceEntries = mergeEvidenceEntries(evidenceEntries); - addHaplotypeProperty(mergedEvidenceEntries, normalisedVariantStringList); - // IMPORTANT: COSMIC must be indexed first because of the next line !!! - variantAnnotation.setTraitAssociation(mergedEvidenceEntries); - rdb.put(normalisedVariantString.getBytes(), jsonObjectWriter.writeValueAsBytes(variantAnnotation)); - } - return true; - } - return false; - } - - private List mergeEvidenceEntries(List evidenceEntries) { - List mergedEvidenceEntries = new ArrayList<>(); - if (evidenceEntries.size() > 0) { - mergedEvidenceEntries.add(evidenceEntries.get(0)); - // For each evidence entry ... - for (int i = 1; i < evidenceEntries.size(); i++) { - boolean merged = false; - // ... check if it matches a existing evidence entry - for (EvidenceEntry mergedEvidenceEntry : mergedEvidenceEntries) { - if (sameSomaticDocument(evidenceEntries.get(i), mergedEvidenceEntry)) { - // Merge Transcripts - if (mergedEvidenceEntry.getGenomicFeatures() != null) { - if (evidenceEntries.get(i).getGenomicFeatures() != null) { - for (GenomicFeature newGenomicFeature : evidenceEntries.get(i).getGenomicFeatures()) { - if (newGenomicFeature.getFeatureType().equals(FeatureTypes.transcript)) { - boolean found = false; - for (GenomicFeature feature : mergedEvidenceEntry.getGenomicFeatures()) { - if (feature.getXrefs().get(SYMBOL).equals(newGenomicFeature.getXrefs().get(SYMBOL))) { - found = true; - } - } - if (!found) { - mergedEvidenceEntry.getGenomicFeatures().add(newGenomicFeature); - } - } - } - } - } else { - mergedEvidenceEntry.setGenomicFeatures(evidenceEntries.get(i).getGenomicFeatures()); - } - - // Merge Bibliography - // There are cosmic records which share all the fields but the bibliography. In some occassions (COSM12600) - // the redundancy is such that the document becomes much bigger than 16MB and cannot be loaded into MongoDB. - // This merge reduces redundancy. - if (mergedEvidenceEntry.getBibliography() != null) { - if (evidenceEntries.get(i).getBibliography() != null) { - Set bibliographySet = new HashSet<>(mergedEvidenceEntry.getBibliography()); - bibliographySet.addAll(new HashSet<>(evidenceEntries.get(i).getBibliography())); - mergedEvidenceEntry.setBibliography(new ArrayList<>(bibliographySet)); - } - } else { - mergedEvidenceEntry.setBibliography(evidenceEntries.get(i).getBibliography()); - } - - merged = true; - break; - } - } - if (!merged) { - mergedEvidenceEntries.add(evidenceEntries.get(i)); - } - } - } - - return mergedEvidenceEntries; - } - - public boolean sameSomaticDocument(EvidenceEntry evidenceEntry1, EvidenceEntry evidenceEntry2) { - if (evidenceEntry1 == evidenceEntry2) { - return true; - } - if (evidenceEntry2 == null || evidenceEntry1.getClass() != evidenceEntry2.getClass()) { - return false; - } - - if (evidenceEntry1.getSource() != null ? !evidenceEntry1.getSource().equals(evidenceEntry2.getSource()) - : evidenceEntry2.getSource() != null) { - return false; - } - if (evidenceEntry1.getSomaticInformation() != null - ? !evidenceEntry1.getSomaticInformation().equals(evidenceEntry2.getSomaticInformation()) - : evidenceEntry2.getSomaticInformation() != null) { - return false; - } - if (evidenceEntry1.getId() != null - ? !evidenceEntry1.getId().equals(evidenceEntry2.getId()) : evidenceEntry2.getId() != null) { - return false; - } - if (evidenceEntry1.getAlleleOrigin() != null - ? !evidenceEntry1.getAlleleOrigin().equals(evidenceEntry2.getAlleleOrigin()) - : evidenceEntry2.getAlleleOrigin() != null) { - return false; - } - if (evidenceEntry1.getGenomicFeatures() != null - ? !evidenceEntry1.getGenomicFeatures().equals(evidenceEntry2.getGenomicFeatures()) - : evidenceEntry2.getGenomicFeatures() != null) { - return false; - } - if (evidenceEntry1.getAdditionalProperties() != null - ? !evidenceEntry1.getAdditionalProperties().equals(evidenceEntry2.getAdditionalProperties()) - : evidenceEntry2.getAdditionalProperties() != null) { - return false; - } - - return true; - } - - /** - * Check whether the variant is valid and parse it. - * - * @return true if valid mutation, false otherwise - */ - private boolean parseVariant(SequenceLocation sequenceLocation, String[] fields) { - boolean validVariant = false; - String mutationCds = fields[HGVS_COLUMN]; - VariantType variantType = getVariantType(mutationCds); - if (variantType != null) { - switch (variantType) { - case SNV: - validVariant = parseSnv(mutationCds, sequenceLocation); - if (!validVariant) { - invalidSubstitutionLines++; - } - break; - case DELETION: - validVariant = parseDeletion(mutationCds, sequenceLocation); - if (!validVariant) { - invalidDeletionLines++; - } - break; - case INSERTION: - validVariant = parseInsertion(mutationCds, sequenceLocation); - if (!validVariant) { - invalidInsertionLines++; - } - break; - case DUPLICATION: - validVariant = parseDuplication(mutationCds); - if (!validVariant) { - invalidDuplicationLines++; - } - break; - default: - System.out.println("variantType = " + variantType); - validVariant = false; - invalidMutationCDSOtherReason++; - } - } - - return validVariant; - } - - private VariantType getVariantType(String mutationCds) { - if (mutationCds.contains(HGVS_SNV_CHANGE_SYMBOL)) { - return VariantType.SNV; - } else if (mutationCds.contains(HGVS_DELETION_TAG)) { - return VariantType.DELETION; - } else if (mutationCds.contains(HGVS_INSERTION_TAG)) { - return VariantType.INSERTION; - } else if (mutationCds.contains(HGVS_DUPLICATION_TAG)) { - return VariantType.DUPLICATION; - } else { - return null; - } - } - - private boolean parseDuplication(String dup) { - // TODO: The only Duplication in Cosmic V70 is a structural variation that is not going to be serialized - return false; - } - - private boolean parseInsertion(String mutationCds, SequenceLocation sequenceLocation) { - boolean validVariant = true; - String[] insParts = mutationCds.split("ins"); - - if (insParts.length > 1) { - String insertedNucleotides = insParts[1]; - if (insertedNucleotides.matches("\\d+") || !insertedNucleotides.matches(VARIANT_STRING_PATTERN)) { - //c.503_508ins30 - validVariant = false; - } else { - sequenceLocation.setReference(""); - sequenceLocation.setAlternate(getPositiveStrandString(insertedNucleotides, sequenceLocation.getStrand())); - } - } else { - validVariant = false; - } - - return validVariant; - } - - private boolean parseDeletion(String mutationCds, SequenceLocation sequenceLocation) { - boolean validVariant = true; - String[] mutationCDSArray = mutationCds.split("del"); - - // For deletions, only deletions of, at most, deletionLength nucleotide are allowed - if (mutationCDSArray.length < 2) { // c.503_508del (usually, deletions of several nucleotides) - // TODO: allow these variants - validVariant = false; - } else if (mutationCDSArray[1].matches("\\d+") - || !mutationCDSArray[1].matches(VARIANT_STRING_PATTERN)) { // Avoid allele strings containing Ns, for example - validVariant = false; - } else { - sequenceLocation.setReference(getPositiveStrandString(mutationCDSArray[1], sequenceLocation.getStrand())); - sequenceLocation.setAlternate(""); - } - - return validVariant; - } - - private boolean parseSnv(String mutationCds, SequenceLocation sequenceLocation) { - boolean validVariant = true; - Matcher snvMatcher = snvPattern.matcher(mutationCds); - - if (snvMatcher.matches()) { - String ref = snvMatcher.group(REF); - String alt = snvMatcher.group(ALT); - if (!ref.equalsIgnoreCase("N") && !alt.equalsIgnoreCase("N")) { - sequenceLocation.setReference(getPositiveStrandString(ref, sequenceLocation.getStrand())); - sequenceLocation.setAlternate(getPositiveStrandString(alt, sequenceLocation.getStrand())); - } else { - validVariant = false; - } - } else { - validVariant = false; - } - - return validVariant; - } - - private String getPositiveStrandString(String alleleString, String strand) { - if (strand.equals("-")) { - return reverseComplementary(alleleString); - } else { - return alleleString; - } - } - - private String reverseComplementary(String alleleString) { - char[] reverseAlleleString = new StringBuilder(alleleString).reverse().toString().toCharArray(); - for (int i = 0; i < reverseAlleleString.length; i++) { - reverseAlleleString[i] = VariantAnnotationUtils.COMPLEMENTARY_NT.get(reverseAlleleString[i]); - } - - return String.valueOf(reverseAlleleString); - } - - private EvidenceEntry buildCosmic(String[] fields) { - String id = fields[ID_COLUMN]; - String url = "https://cancer.sanger.ac.uk/cosmic/search?q=" + id; - - EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.COSMIC_DATA, version, date); - SomaticInformation somaticInformation = getSomaticInformation(fields); - List genomicFeatureList = getGenomicFeature(fields); - - List additionalProperties = new ArrayList<>(); - additionalProperties.add(new Property("COSM_ID", "Legacy COSM ID", fields[COSM_ID_COLUMN])); - additionalProperties.add(new Property("MUTATION_DESCRIPTION", "Description", fields[MUTATION_DESCRIPTION_COLUMN])); - if (StringUtils.isNotEmpty(fields[MUTATION_ZYGOSITY_COLUMN])) { - additionalProperties.add(new Property("MUTATION_ZYGOSITY", "Mutation Zygosity", fields[MUTATION_ZYGOSITY_COLUMN])); - } - additionalProperties.add(new Property("FATHMM_PREDICTION", "FATHMM Prediction", fields[FATHMM_PREDICTION_COLUMN])); - additionalProperties.add(new Property("FATHMM_SCORE", "FATHMM Score", "0" + fields[FATHMM_SCORE_COLUMN])); - additionalProperties.add(new Property("MUTATION_SOMATIC_STATUS", "Mutation Somatic Status", - fields[MUTATION_SOMATIC_STATUS_COLUMN])); - - List bibliography = getBibliography(fields[PUBMED_PMID_COLUMN]); - - return new EvidenceEntry(evidenceSource, Collections.emptyList(), somaticInformation, - url, id, assembly, - getAlleleOriginList(Collections.singletonList(fields[MUTATION_SOMATIC_STATUS_COLUMN])), - Collections.emptyList(), genomicFeatureList, null, null, null, null, - EthnicCategory.Z, null, null, null, additionalProperties, bibliography); - } - - private SomaticInformation getSomaticInformation(String[] fields) { - String primarySite = null; - if (!EtlCommons.isMissing(fields[PRIMARY_SITE_COLUMN])) { - primarySite = fields[PRIMARY_SITE_COLUMN].replace("_", " "); - } - String siteSubtype = null; - if (!EtlCommons.isMissing(fields[SITE_SUBTYPE_COLUMN])) { - siteSubtype = fields[SITE_SUBTYPE_COLUMN].replace("_", " "); - } - String primaryHistology = null; - if (!EtlCommons.isMissing(fields[PRIMARY_HISTOLOGY_COLUMN])) { - primaryHistology = fields[PRIMARY_HISTOLOGY_COLUMN].replace("_", " "); - } - String histologySubtype = null; - if (!EtlCommons.isMissing(fields[HISTOLOGY_SUBTYPE_COLUMN])) { - histologySubtype = fields[HISTOLOGY_SUBTYPE_COLUMN].replace("_", " "); - } - String tumourOrigin = null; - if (!EtlCommons.isMissing(fields[TUMOUR_ORIGIN_COLUMN])) { - tumourOrigin = fields[TUMOUR_ORIGIN_COLUMN].replace("_", " "); - } - String sampleSource = null; - if (!EtlCommons.isMissing(fields[SAMPLE_SOURCE_COLUMN])) { - sampleSource = fields[SAMPLE_SOURCE_COLUMN].replace("_", " "); - } - - return new SomaticInformation(primarySite, siteSubtype, primaryHistology, histologySubtype, tumourOrigin, sampleSource); - } - - private List getBibliography(String bibliographyString) { - if (!EtlCommons.isMissing(bibliographyString)) { - return Collections.singletonList("PMID:" + bibliographyString); - } - - return Collections.emptyList(); - } - - private List getGenomicFeature(String[] fields) { - List genomicFeatureList = new ArrayList<>(5); - if (fields[GENE_NAMES_COLUMN].contains("_")) { - genomicFeatureList.add(createGeneGenomicFeature(fields[GENE_NAMES_COLUMN].split("_")[0])); - } - // Add transcript ID - if (StringUtils.isNotEmpty(fields[1])) { - genomicFeatureList.add(createGeneGenomicFeature(fields[1], FeatureTypes.transcript)); - } - if (!fields[HGNC_COLUMN].equalsIgnoreCase(fields[GENE_NAMES_COLUMN]) && !EtlCommons.isMissing(fields[HGNC_COLUMN])) { - genomicFeatureList.add(createGeneGenomicFeature(fields[HGNC_COLUMN])); - } - - return genomicFeatureList; - } - - public SequenceLocation parseLocation(String[] fields) { - SequenceLocation sequenceLocation = null; - String locationString = fields[25]; - if (StringUtils.isNotEmpty(locationString)) { - Matcher matcher = mutationGRCh37GenomePositionPattern.matcher(locationString); - if (matcher.matches()) { - sequenceLocation = new SequenceLocation(); - sequenceLocation.setChromosome(getCosmicChromosome(matcher.group(CHROMOSOME))); - sequenceLocation.setStrand(fields[26]); - - String mutationCds = fields[HGVS_COLUMN]; - VariantType variantType = getVariantType(mutationCds); - if (VariantType.INSERTION.equals(variantType)) { - sequenceLocation.setEnd(Integer.parseInt(matcher.group(START))); - sequenceLocation.setStart(Integer.parseInt(matcher.group(END))); - } else { - sequenceLocation.setStart(Integer.parseInt(matcher.group(START))); - sequenceLocation.setEnd(Integer.parseInt(matcher.group(END))); - } - } - } - if (sequenceLocation == null) { - this.invalidPositionLines++; - } - return sequenceLocation; - } - - private String getCosmicChromosome(String chromosome) { - switch (chromosome) { - case "23": - return "X"; - case "24": - return "Y"; - case "25": - return "MT"; - default: - return chromosome; - } - } - -======= public void index() throws RocksDBException, CellBaseException { // Call COSMIC parser try { @@ -612,5 +53,4 @@ public void index() throws RocksDBException, CellBaseException { throw new CellBaseException("Error parsing COSMIC file " + cosmicFile, e); } } ->>>>>>> release-6.x.x } From b40f6b24b80e967f7865e6336e8046e719d9972f Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 2 Jun 2025 12:39:20 +0200 Subject: [PATCH 5/7] Fix org in cellbase for deploy docker #TASK-7531 --- cellbase-app/app/cloud/docker/docker-build.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/cellbase-app/app/cloud/docker/docker-build.py b/cellbase-app/app/cloud/docker/docker-build.py index dd3d12f25..778c3a5d7 100755 --- a/cellbase-app/app/cloud/docker/docker-build.py +++ b/cellbase-app/app/cloud/docker/docker-build.py @@ -17,6 +17,7 @@ parser.add_argument('--build-folder', help="the location of the build folder, if not default location") parser.add_argument('--username', help="credentials for dockerhub (REQUIRED if deleting from DockerHub)") parser.add_argument('--password', help="credentials for dockerhub (REQUIRED if deleting from DockerHub)") +parser.add_argument('--org', help="Docker organization", default="opencb") ## Some ANSI colors to print shell output shell_colors = { @@ -62,31 +63,31 @@ def build(): print_header('Building docker images: ' + ', '.join(images)) for image in images: print() - print(shell_colors['blue'] + "Building opencb/cellbase-" + image + ":" + tag + " ..." + shell_colors['reset']) + print(shell_colors['blue'] + "Building " + org + "/cellbase-" + image + ":" + tag + " ..." + shell_colors['reset']) if image == "base": - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile " + build_folder) + run("docker build -t " + org + "/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile " + build_folder) else: - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile --build-arg TAG=" + tag + " " + build_folder) + run("docker build -t " + org + "/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile --build-arg TAG=" + tag + " " + build_folder) def tag_latest(image): - latest_tag = os.popen(("curl -s https://registry.hub.docker.com/v1/repositories/opencb/cellbase-" + image + "/tags" + latest_tag = os.popen(("curl -s https://registry.hub.docker.com/v1/repositories/" + org + "/cellbase-" + image + "/tags" + " | jq -r .[].name" + " | grep -v latest" + " | sort -h" + " | head")) if tag >= latest_tag.read(): - print(shell_colors['blue'] + "Pushing opencb/cellbase-" + image + ":latest" + shell_colors['reset']) - run("docker tag opencb/cellbase-" + image + ":" + tag + " opencb/cellbase-" + image + ":latest") - run("docker push opencb/cellbase-" + image + ":latest") + print(shell_colors['blue'] + "Pushing " + org + "/cellbase-" + image + ":latest" + shell_colors['reset']) + run("docker tag " + org + "/cellbase-" + image + ":" + tag + " " + org + "/cellbase-" + image + ":latest") + run("docker push " + org + "/cellbase-" + image + ":latest") def push(): print_header('Pushing to DockerHub: ' + ', '.join(images)) for i in images: print() - print(shell_colors['blue'] + "Pushing opencb/cellbase-" + i + ":" + tag + " ..." + shell_colors['reset']) - run("docker push opencb/cellbase-" + i + ":" + tag) + print(shell_colors['blue'] + "Pushing " + org + "/cellbase-" + i + ":" + tag + " ..." + shell_colors['reset']) + run("docker push " + org + "/cellbase-" + i + ":" + tag) tag_latest(i) @@ -104,11 +105,11 @@ def delete(): error("dockerhub login failed") for i in images: print() - print(shell_colors['blue'] + 'Deleting image on Docker hub for opencb/cellbase-' + i + ':' + tag + shell_colors['reset']) + print(shell_colors['blue'] + 'Deleting image on Docker hub for ' + org + '/cellbase-' + i + ':' + tag + shell_colors['reset']) headers = { 'Authorization': 'JWT ' + json_response["token"] } - requests.delete('https://hub.docker.com/v2/repositories/opencb/cellbase-' + i + '/tags/' + tag + '/', headers=headers) + requests.delete('https://hub.docker.com/v2/repositories/' + org + '/cellbase-' + i + '/tags/' + tag + '/', headers=headers) ## Parse command-line parameters and init basedir, tag and build_folder @@ -144,6 +145,11 @@ def delete(): else: images = args.images.split(",") +# 5. Set docker org to default value if not set +if args.org is not None: + org = args.org +else: + org = "opencb" ## Execute the action if args.action == "build": From 08359f835896cf7f9c2e51ad5f5c6ac7d9ef66c1 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 5 Jun 2025 15:42:49 +0200 Subject: [PATCH 6/7] Prepare release 6.5.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 244646d0d..243231c5b 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 7e89bbe9d..018ff4714 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index c5fe18a0e..49bb3d79b 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 9a6de7901..41b17100b 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index fe8aeb291..463ab65d5 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 9f0c8d7c5..6c7db05f4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0-SNAPSHOT + 6.5.0 pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.5.0-SNAPSHOT - 3.5.0-SNAPSHOT + 5.5.0 + 3.5.0 0.1.0 2.11.4 1.9.13 From d1211f87f6d5052240693fecab14df3f52e34292 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 13 Jun 2025 10:59:33 +0200 Subject: [PATCH 7/7] Preparing port patch from version 6.5.0 to 7.1.0-SNAPSHOT --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- pom.xml | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 243231c5b..9d8ce41f4 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 018ff4714..907285d84 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 49bb3d79b..b9d296556 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 41b17100b..eded12351 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 463ab65d5..52ace081c 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 6c7db05f4..3200941cd 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 6.5.0 + 7.1.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 5.5.0 - 3.5.0 + 6.1.0-SNAPSHOT + 4.1.0-SNAPSHOT 0.1.0 2.11.4 1.9.13