From 709b4da87c0186efc049a9b7625fd04fdddd9797 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 16:16:15 -0500 Subject: [PATCH 01/15] try async command for archiving --- .../edu/harvard/iq/dataverse/DatasetPage.java | 25 ++++++++---------- .../iq/dataverse/EjbDataverseEngine.java | 26 +++++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 1 + 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 20617160a1c..b97b8ec6578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,20 +6101,17 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - DatasetVersion version = commandEngine.submit(cmd); - if (!version.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - logger.info( - "DatasetVersion id=" + version.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - } else { - logger.severe("Error submitting version " + version.getId() + " due to conflict/error at Archive"); - } - if (version.getArchivalCopyLocation() != null) { - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.success")); - } else { - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); - } + commandEngine.submitAsync(cmd); + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 4d6d59cb013..5a3f105497d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -31,6 +31,9 @@ import java.util.Map; import java.util.Set; + +import jakarta.ejb.AsyncResult; +import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Named; @@ -45,6 +48,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.Stack; +import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.annotation.Resource; @@ -348,6 +352,28 @@ public R submit(Command aCommand) throws CommandException { logSvc.log(logRec); } } + + /** + * Submits a command for asynchronous execution. + * The command will be executed in a separate thread and won't block the caller. + * + * @param The return type of the command + * @param aCommand The command to execute + * @param user The user executing the command + * @return A Future representing the pending result + * @throws CommandException if the command cannot be submitted + */ + @Asynchronous + public Future submitAsync(Command aCommand) throws CommandException { + try { + logger.log(Level.INFO, "Submitting async command: {0}", aCommand.getClass().getSimpleName()); + R result = submit(aCommand); + return new AsyncResult<>(result); + } catch (Exception e) { + logger.log(Level.SEVERE, "Async command execution failed: " + aCommand.getClass().getSimpleName(), e); + throw e; + } + } protected void completeCommand(Command command, Object r, Stack called) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index f6c0054a43a..d9b9fd7bc48 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2699,6 +2699,7 @@ dataset.notlinked.msg=There was a problem linking this dataset to yours: dataset.linking.popop.already.linked.note=Note: This dataset is already linked to the following dataverse(s): dataset.linking.popup.not.linked.note=Note: This dataset is not linked to any of your accessible dataverses datasetversion.archive.success=Archival copy of Version successfully submitted +datasetversion.archive.inprogress= Data Project archiving has been started datasetversion.archive.failure=Error in submitting an archival copy datasetversion.update.failure=Dataset Version Update failed. Changes are still in the DRAFT version. datasetversion.update.archive.failure=Dataset Version Update succeeded, but the attempt to update the archival copy failed. From 6487c1433f1c960d645250cea421c1659120d3c9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 17:07:48 -0500 Subject: [PATCH 02/15] save status --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b97b8ec6578..0bf0db42728 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,12 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - commandEngine.submitAsync(cmd); - + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + dv = datasetVersionService.merge(dv); + + commandEngine.submitAsync(cmd); logger.info( "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); From 9d32051fe76d0914fc35d21f693211054fc0c38a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 8 Jan 2026 13:07:23 -0500 Subject: [PATCH 03/15] refactor, use persistArchivalCopyLocation everywhere --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/DatasetVersionServiceBean.java | 17 +++++++++++++++++ .../edu/harvard/iq/dataverse/api/Datasets.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 3 ++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0bf0db42728..281734cd66e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6104,7 +6104,7 @@ public void archiveVersion(Long id) { // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - dv = datasetVersionService.merge(dv); + datasetVersionService.persistArchivalCopyLocation(dv); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 60df1fd3dfd..7656f975d2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1333,4 +1333,21 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer return em.createQuery(cq).getSingleResult(); } + + + /** + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * + * @param dv + * The dataset version whose archival copy location we want to update. Must not be {@code null}. + * @param archivalStatusPending + * the JSON status string, may be {@code null}. + */ + public void persistArchivalCopyLocation(DatasetVersion dv) { + em.createNativeQuery( + "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") + .setParameter(1, dv.getArchivalCopyLocation()) + .setParameter(2, dv.getId()) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1b3016ec2f4..c8e66115575 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1280,6 +1280,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this * command within the CuratePublishedDatasetVersionCommand was causing an error: diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 29c27d0396d..7e39a8e7b85 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -63,7 +63,8 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { token = ctxt.authentication().generateApiTokenForUser(user); } performArchiveSubmission(version, token, requestedSettings); - return ctxt.em().merge(version); + ctxt.datasetVersion().persistArchivalCopyLocation(version); + return version; } /** From ec5046cc161193fd102481a9a53cb439c5768f94 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Jan 2026 10:55:48 -0500 Subject: [PATCH 04/15] catch OLE when persisting archivalcopylocation --- .../dataverse/DatasetVersionServiceBean.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 7656f975d2a..b5e964e5673 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -33,6 +33,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; +import jakarta.persistence.OptimisticLockException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; @@ -1336,18 +1337,25 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer /** - * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred so this method will check + * for OptimisticLockExceptions and retry the update with the latest version. * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. - * @param archivalStatusPending - * the JSON status string, may be {@code null}. */ public void persistArchivalCopyLocation(DatasetVersion dv) { - em.createNativeQuery( - "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") - .setParameter(1, dv.getArchivalCopyLocation()) - .setParameter(2, dv.getId()) - .executeUpdate(); + try { + em.merge(dv); + em.flush(); // Force the update and version check immediately + } catch (OptimisticLockException ole) { + logger.log(Level.INFO, "OptimisticLockException while persisting archival copy location for DatasetVersion id={0}. Retrying on latest version.", dv.getId()); + DatasetVersion currentVersion = find(dv.getId()); + if (currentVersion != null) { + currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); + em.merge(currentVersion); + } else { + logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); + } + } } } From c1055b87cd3445adc0a21f4248c1ec2fb4442774 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:23:22 -0500 Subject: [PATCH 05/15] Add obsolete state, update display, add supportsDelete --- .../edu/harvard/iq/dataverse/DatasetPage.java | 81 ++++++++++++------- .../harvard/iq/dataverse/DatasetVersion.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 4 + .../GoogleCloudSubmitToArchiveCommand.java | 39 ++++++++- src/main/java/propertyFiles/Bundle.properties | 1 + src/main/webapp/dataset-versions.xhtml | 8 +- 6 files changed, 103 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 281734cd66e..0832560eafb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -42,6 +42,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.ingest.IngestRequest; @@ -105,6 +106,9 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.persistence.OptimisticLockException; import org.apache.commons.lang3.StringUtils; @@ -2992,27 +2996,40 @@ public String updateCurrentVersion() { String className = settingsService.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), updateVersion); if (archiveCommand != null) { - // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); - /* - * Then try to generate and submit an archival copy. Note that running this - * command within the CuratePublishedDatasetVersionCommand was causing an error: - * "The attribute [id] of class - * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary - * key column in the database. Updates are not allowed." To avoid that, and to - * simplify reporting back to the GUI whether this optional step succeeded, I've - * pulled this out as a separate submit(). - */ - try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + //There is an archiver configured, so now decide what to dO: + // If a successful copy exists, don't automatically update, just note the old copy is obsolete (and enable the superadmin button in the display to allow a ~manual update if desired) + // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare + // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again + String status = updateVersion.getArchivalCopyLocationStatus(); + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + // Delete the record of any existing copy since it is now out of date/incorrect + updateVersion.setArchivalCopyLocation(null); + /* + * Then try to generate and submit an archival copy. Note that running this + * command within the CuratePublishedDatasetVersionCommand was causing an error: + * "The attribute [id] of class + * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary + * key column in the database. Updates are not allowed." To avoid that, and to + * simplify reporting back to the GUI whether this optional step succeeded, I've + * pulled this out as a separate submit(). + */ + try { + updateVersion = commandEngine.submit(archiveCommand); + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); + } else { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + } + } catch (CommandException ex) { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); + logger.severe(ex.getMessage()); } - } catch (CommandException ex) { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); - logger.severe(ex.getMessage()); + } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); + JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); + job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.merge(updateVersion); } } } @@ -6094,14 +6111,16 @@ public void refreshPaginator() { * * @param id - the id of the datasetversion to archive. */ - public void archiveVersion(Long id) { + public void archiveVersion(Long id, boolean force) { if (session.getUser() instanceof AuthenticatedUser) { DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - + String status = dv.getArchivalCopyLocationStatus(); + if(status == null || (force && cmd.supportsDelete())){ + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); datasetVersionService.persistArchivalCopyLocation(dv); @@ -6113,7 +6132,7 @@ public void archiveVersion(Long id) { setVersionTabList(resetVersionTabList()); this.setVersionTabListForPostLoad(getVersionTabList()); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); - + } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); @@ -6146,21 +6165,26 @@ public boolean isArchivable() { return archivable; } + /** Method to decide if a 'Submit' button should be enabled for archiving a dataset version. */ public boolean isVersionArchivable() { if (versionArchivable == null) { // If this dataset isn't in an archivable collection return false versionArchivable = false; if (isArchivable()) { - boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only // If it is, we have to check for an existing archived version to answer the // question String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { + boolean checkForArchivalCopy = false; Class clazz = Class.forName(className); Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Method m2 = clazz.getMethod("supportsDelete"); + Object[] params = { settingsWrapper }; + boolean supportsDelete = (Boolean) m2.invoke(null); checkForArchivalCopy = (Boolean) m.invoke(null, params); if (checkForArchivalCopy) { @@ -6168,9 +6192,12 @@ public boolean isVersionArchivable() { // one version is already archived (or attempted - any non-null status) versionArchivable = !isSomeVersionArchived(); } else { - // If we allow multiple versions or didn't find one that has had archiving run - // on it, we can archive, so return true - versionArchivable = true; + // If we didn't find one that has had archiving run + // on it, or we archiving per version is supported and either + // the status is null or the archiver can delete prior runs and status isn't success, + // we can archive, so return true + String status = workingVersion.getArchivalCopyLocationStatus(); + versionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete)); } } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 93b0ccfef61..0de0dedc860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -132,6 +132,7 @@ public enum VersionState { public static final String ARCHIVAL_STATUS_PENDING = "pending"; public static final String ARCHIVAL_STATUS_SUCCESS = "success"; public static final String ARCHIVAL_STATUS_FAILURE = "failure"; + public static final String ARCHIVAL_STATUS_OBSOLETE = "obsolete"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 7e39a8e7b85..f7716534b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -184,4 +184,8 @@ public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } + + public static boolean supportsDelete() { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 7dfb9f07e19..97ca104f01c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,6 +44,11 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } + @Override + public static boolean supportsDelete() { + return true; + } + @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); @@ -73,6 +78,34 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Check for and delete existing files for this version + String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + + logger.fine("Checking for existing files in archive..."); + + try { + Blob existingDatacite = bucket.get(dataciteFileName); + if (existingDatacite != null && existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + existingDatacite.delete(); + logger.fine("Deleted existing datacite.xml"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + } + + try { + Blob existingBag = bucket.get(bagFileName); + if (existingBag != null && existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + bagFileName); + existingBag.delete(); + logger.fine("Deleted existing bag file"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + } + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); @@ -102,7 +135,7 @@ public void run() { Thread.sleep(10); i++; } - Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); dcThread.join(); String checksum = dcXml.getMd5ToHexString(); @@ -131,7 +164,7 @@ public void run() { try (PipedInputStream in = new PipedInputStream(100000); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip", + Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", Bucket.BlobWriteOption.doesNotExist()); if (bag.getSize() == 0) { throw new IOException("Empty Bag"); @@ -139,7 +172,7 @@ public void run() { bagThread.join(); checksum = bag.getMd5ToHexString(); - logger.fine("Bag: " + fileName + " added with checksum: " + checksum); + logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); if (!success || !checksum.equals(localchecksum)) { logger.severe(success ? checksum + " not equal to " + localchecksum diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index d9b9fd7bc48..dbc2ce40657 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2141,6 +2141,7 @@ file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) wit file.dataFilesTab.versions.headers.published=Published on file.dataFilesTab.versions.headers.archived=Archival Status file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.obsolete=Original Version Archived file.dataFilesTab.versions.headers.archived.pending=Pending file.dataFilesTab.versions.headers.archived.failure=Failed file.dataFilesTab.versions.headers.archived.notarchived=Not Archived diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 9e5f0a9b24d..1f33675bd3d 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -169,9 +169,15 @@ + + + + + + - From f912fd043945850ac87d396833cdc9c94d62f56c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:34:32 -0500 Subject: [PATCH 06/15] doc that api doesn't handls supportsDelete yet --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 18f28569d7d..10aadde57b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2067,6 +2067,7 @@ public Response submitDatasetVersionToArchive(@Context ContainerRequestContext c if(dv==null) { return error(Status.BAD_REQUEST, "Requested version not found."); } + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); // Note - the user is being sent via the createDataverseRequest(au) call to the @@ -2132,7 +2133,7 @@ public Response archiveAllUnarchivedDatasetVersions(@Context ContainerRequestCon try { AuthenticatedUser au = getRequestAuthenticatedUserOrDie(crc); - + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { From 00f115e23e50f8d70338256fbd34d8270a9900a1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:55:51 -0500 Subject: [PATCH 07/15] support reflective and instance calls re: delete capability --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../impl/AbstractSubmitToArchiveCommand.java | 14 +++++++++++--- .../impl/GoogleCloudSubmitToArchiveCommand.java | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0832560eafb..09669fb789e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6119,7 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.supportsDelete())){ + if(status == null || (force && cmd.canDelete())){ // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index f7716534b7f..aaeef193ff4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -185,7 +185,15 @@ public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } - public static boolean supportsDelete() { - return false; - } + /** Whether the archiver can delete existing archival files (and thus can retry when the existing files are incomplete/obsolete) + * A static version supports calls via reflection while the instance method supports inheritance for use on actual command instances (see DatasetPage for both use cases). + * @return + */ + public static boolean supportsDelete() { + return false; + } + + public boolean canDelete() { + return supportsDelete(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 97ca104f01c..61a38cffc99 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,10 +44,13 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } - @Override public static boolean supportsDelete() { return true; } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { From bc403703ab672d1ac30ba16d928a3eaa1de87214 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 10 Dec 2025 16:14:30 -0500 Subject: [PATCH 08/15] use query to update status, async everywhere --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 14 +++++--------- .../edu/harvard/iq/dataverse/api/Datasets.java | 10 +++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 09669fb789e..db9e9caa671 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3014,21 +3014,18 @@ public String updateCurrentVersion() { * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } catch (CommandException ex) { errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + //Not automatically replacing the old archival copy as creating it is expensive JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); datasetVersionService.merge(updateVersion); } } @@ -6122,8 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if(status == null || (force && cmd.canDelete())){ // Set initial pending status - dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - datasetVersionService.persistArchivalCopyLocation(dv); + datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c8e66115575..bf0f7c6668a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this @@ -1291,12 +1291,8 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + successMsg = BundleUtil.getStringFromBundle("datasetversion.archive.inprogress"); } catch (CommandException ex) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); From df9b5cec3c83ec066dc274d35edea9ee9f9e98a6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Dec 2025 18:23:56 -0500 Subject: [PATCH 09/15] fixes for dataset page re: archiving --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 1f33675bd3d..89a8162c135 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -170,14 +170,14 @@ - + - From a64e1f749c2f44c14b4386e1c22195e1c65d8ea8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 16 Jan 2026 13:33:19 -0500 Subject: [PATCH 10/15] merge issues --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index db9e9caa671..4b559af3878 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,7 @@ public void refreshPaginator() { /** * This method can be called from *.xhtml files to allow archiving of a dataset - * version from the user interface. It is not currently (11/18) used in the IQSS/develop - * branch, but is used by QDR and is kept here in anticipation of including a - * GUI option to archive (already published) versions after other dataset page - * changes have been completed. + * version from the user interface. * * @param id - the id of the datasetversion to archive. */ From c55230ee81481b465323b16800e98679fe5fa36c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jan 2026 17:38:37 -0500 Subject: [PATCH 11/15] merge fix of persistArchivalCopy method refactors --- .../edu/harvard/iq/dataverse/DatasetPage.java | 30 +++++++++---------- .../harvard/iq/dataverse/DatasetVersion.java | 24 ++++++++++----- .../harvard/iq/dataverse/api/Datasets.java | 2 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 4b559af3878..fe17a137361 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3022,10 +3022,8 @@ public String updateCurrentVersion() { } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { //Not automatically replacing the old archival copy as creating it is expensive - JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); - JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); - job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); + updateVersion.setArchivalStatus(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + datasetVersionService.persistArchivalCopyLocation(updateVersion); datasetVersionService.merge(updateVersion); } } @@ -6113,18 +6111,18 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.canDelete())){ - - // Set initial pending status - datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); - - commandEngine.submitAsync(cmd); - - logger.info( - "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + if (status == null || (force && cmd.canDelete())) { + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + datasetVersionService.persistArchivalCopyLocation(dv); + commandEngine.submitAsync(cmd); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 0de0dedc860..1248a8266ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -232,8 +232,9 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + //The Json version of the archivalCopyLocation string @Transient - private JsonObject archivalStatus; + private JsonObject archivalCopyLocationJson; public Long getId() { return this.id; @@ -384,24 +385,24 @@ public String getArchivalCopyLocation() { public String getArchivalCopyLocationStatus() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS); } return null; } public String getArchivalCopyLocationMessage() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS_MESSAGE); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS_MESSAGE); } return null; } private void populateArchivalStatus(boolean force) { - if(archivalStatus ==null || force) { + if(archivalCopyLocationJson ==null || force) { if(archivalCopyLocation!=null) { try { - archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + archivalCopyLocationJson = JsonUtil.getJsonObject(archivalCopyLocation); } catch(Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); logger.fine(archivalCopyLocation); @@ -415,6 +416,15 @@ public void setArchivalCopyLocation(String location) { populateArchivalStatus(true); } + // COnvenience method to set only the status + public void setArchivalStatus(String status) { + populateArchivalStatus(false); + JsonObjectBuilder job = Json.createObjectBuilder(archivalCopyLocationJson); + job.add(DatasetVersion.ARCHIVAL_STATUS, status); + archivalCopyLocationJson = job.build(); + archivalCopyLocation = JsonUtil.prettyPrint(archivalCopyLocationJson); + } + public String getDeaccessionLink() { return deaccessionLink; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index bf0f7c6668a..dba4b36d4da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); + updateVersion.setArchivalCopyLocation(null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this From 905570a81563b8428042398ac1778fd4d380b61d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 12:57:38 -0500 Subject: [PATCH 12/15] add flag, docs --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++++ .../java/edu/harvard/iq/dataverse/DatasetPage.java | 3 ++- .../harvard/iq/dataverse/settings/FeatureFlags.java | 13 +++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a9d5c7c0041..68982881d77 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2263,6 +2263,9 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. + .. _Duracloud Configuration: Duracloud Configuration @@ -4031,6 +4034,13 @@ dataverse.feature.only-update-datacite-when-needed Only contact DataCite to update a DOI after checking to see if DataCite has outdated information (for efficiency, lighter load on DataCite, especially when using file DOIs). +.. _dataverse.feature.archive-on-version-update: + +dataverse.feature.archive-on-version-update ++++++++++++++++++++++++++++++++++++++++++++ + +Indicates whether archival bag creation should be triggered (if configured) when a version is updated and was already successfully archived, +i.e via the Update-Current-Version publication option. Setting the flag true only works if the archiver being used supports deleting existing archival bags. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index fe17a137361..a091005b392 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -164,6 +164,7 @@ import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -3001,7 +3002,7 @@ public String updateCurrentVersion() { // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again String status = updateVersion.getArchivalCopyLocationStatus(); - if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); /* diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 2e86fae610e..fdbdb257dbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -249,6 +249,19 @@ public enum FeatureFlags { * @since Dataverse 6.9 */ ONLY_UPDATE_DATACITE_WHEN_NEEDED("only-update-datacite-when-needed"), + /** + * Indicates whether archival bag creation should be triggered (if configured) when a version + * is updated and was already successfully archived, i.e via the Update-Current-Version publication option. + * Since archiving can be resource intensive, it may not be worthwhile to automatically re-archive for the + * types of minor changes "Update-Current-Version" is intended for. Note that this flag is only effective + * for archivers that support deletion of existing files. When the flag is false, or the archiver cannot + * delete, the existing archival status will be changed to "Obsolete". + * + * * @apiNote Raise flag by setting "dataverse.feature.archive-on-version-update" + * + * @since Dataverse 6.10 + */ + ARCHIVE_ON_VERSION_UPDATE("archive-on-version-update"), ; From 521fbf68f2d6ba72b06343c32cf6154b027c899f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 15:01:50 -0500 Subject: [PATCH 13/15] add delete to local and S3 --- .../impl/LocalSubmitToArchiveCommand.java | 49 +++++++++- .../impl/S3SubmitToArchiveCommand.java | 94 ++++++++++++++++--- 2 files changed, 129 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 462879f2ec9..34fadbed703 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -34,6 +34,14 @@ public class LocalSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -57,15 +65,52 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Define file paths + String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + File existingDatacite = new File(dataciteFileName); + if (existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + if (existingDatacite.delete()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); + } + } + + File existingBag = new File(zipName); + if (existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + zipName); + if (existingBag.delete()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + zipName); + } + } + + // Also check for and delete the .partial file if it exists + File existingPartial = new File(zipName + ".partial"); + if (existingPartial.exists()) { + logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); + if (existingPartial.delete()) { + logger.fine("Deleted existing partial bag file"); + } else { + logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); + } + } + String dataciteXml = getDataCiteXml(dv); FileUtils.writeStringToFile( - new File(localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"), + new File(dataciteFileName), dataciteXml, StandardCharsets.UTF_8); BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? bagger.generateBag(new FileOutputStream(zipName + ".partial")); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 65531d775c8..768d5d03e1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -14,9 +14,7 @@ import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileInputStream; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.concurrent.CompletableFuture; @@ -38,18 +36,15 @@ import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.core.async.AsyncRequestBody; -import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesRequest; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesResponse; -import software.amazon.awssdk.services.s3.model.ObjectAttributes; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.S3ClientBuilder; -import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.http.async.SdkAsyncHttpClient; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.utils.StringUtils; @@ -76,6 +71,14 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -105,10 +108,78 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (dataset.getLockFor(Reason.finalizePublication) == null) { spaceName = getSpaceName(dataset); - String dataciteXml = getDataCiteXml(dv); - // Add datacite.xml file + + // Define keys for datacite.xml and bag file String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + try { + HeadObjectRequest headDcRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + s3.headObject(headDcRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing datacite.xml, deleting: " + dcKey); + DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); + DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); + + if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dcKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing datacite.xml found"); + } else { + logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); + } + } + try { + HeadObjectRequest headBagRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + s3.headObject(headBagRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing bag file, deleting: " + bagKey); + DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); + DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); + + if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + bagKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing bag file found"); + } else { + logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); + } + } + + String dataciteXml = getDataCiteXml(dv); + // Add datacite.xml file PutObjectRequest putRequest = PutObjectRequest.builder() .bucket(bucketName) .key(dcKey) @@ -128,7 +199,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Store BagIt file String fileName = getFileName(spaceName, dv); - String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer From ba04ba2455529ed7f8f5bba5cf5818fc255f364e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 16:50:42 -0500 Subject: [PATCH 14/15] fix doc ref --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 68982881d77..d0b4eac6ab2 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: From 7a186693a02683b752f898b18eb425d3ea84134d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 17:11:32 -0500 Subject: [PATCH 15/15] remove errant : char --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d0b4eac6ab2..d6cea5b16e3 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: