From 709b4da87c0186efc049a9b7625fd04fdddd9797 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 16:16:15 -0500 Subject: [PATCH 01/22] try async command for archiving --- .../edu/harvard/iq/dataverse/DatasetPage.java | 25 ++++++++---------- .../iq/dataverse/EjbDataverseEngine.java | 26 +++++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 1 + 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 20617160a1c..b97b8ec6578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,20 +6101,17 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - DatasetVersion version = commandEngine.submit(cmd); - if (!version.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - logger.info( - "DatasetVersion id=" + version.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - } else { - logger.severe("Error submitting version " + version.getId() + " due to conflict/error at Archive"); - } - if (version.getArchivalCopyLocation() != null) { - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.success")); - } else { - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); - } + commandEngine.submitAsync(cmd); + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 4d6d59cb013..5a3f105497d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -31,6 +31,9 @@ import java.util.Map; import java.util.Set; + +import jakarta.ejb.AsyncResult; +import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Named; @@ -45,6 +48,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.Stack; +import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.annotation.Resource; @@ -348,6 +352,28 @@ public R submit(Command aCommand) throws CommandException { logSvc.log(logRec); } } + + /** + * Submits a command for asynchronous execution. + * The command will be executed in a separate thread and won't block the caller. + * + * @param The return type of the command + * @param aCommand The command to execute + * @param user The user executing the command + * @return A Future representing the pending result + * @throws CommandException if the command cannot be submitted + */ + @Asynchronous + public Future submitAsync(Command aCommand) throws CommandException { + try { + logger.log(Level.INFO, "Submitting async command: {0}", aCommand.getClass().getSimpleName()); + R result = submit(aCommand); + return new AsyncResult<>(result); + } catch (Exception e) { + logger.log(Level.SEVERE, "Async command execution failed: " + aCommand.getClass().getSimpleName(), e); + throw e; + } + } protected void completeCommand(Command command, Object r, Stack called) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index f6c0054a43a..d9b9fd7bc48 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2699,6 +2699,7 @@ dataset.notlinked.msg=There was a problem linking this dataset to yours: dataset.linking.popop.already.linked.note=Note: This dataset is already linked to the following dataverse(s): dataset.linking.popup.not.linked.note=Note: This dataset is not linked to any of your accessible dataverses datasetversion.archive.success=Archival copy of Version successfully submitted +datasetversion.archive.inprogress= Data Project archiving has been started datasetversion.archive.failure=Error in submitting an archival copy datasetversion.update.failure=Dataset Version Update failed. Changes are still in the DRAFT version. datasetversion.update.archive.failure=Dataset Version Update succeeded, but the attempt to update the archival copy failed. From 6487c1433f1c960d645250cea421c1659120d3c9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 17:07:48 -0500 Subject: [PATCH 02/22] save status --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b97b8ec6578..0bf0db42728 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,12 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - commandEngine.submitAsync(cmd); - + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + dv = datasetVersionService.merge(dv); + + commandEngine.submitAsync(cmd); logger.info( "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); From 9d32051fe76d0914fc35d21f693211054fc0c38a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 8 Jan 2026 13:07:23 -0500 Subject: [PATCH 03/22] refactor, use persistArchivalCopyLocation everywhere --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/DatasetVersionServiceBean.java | 17 +++++++++++++++++ .../edu/harvard/iq/dataverse/api/Datasets.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 3 ++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0bf0db42728..281734cd66e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6104,7 +6104,7 @@ public void archiveVersion(Long id) { // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - dv = datasetVersionService.merge(dv); + datasetVersionService.persistArchivalCopyLocation(dv); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 60df1fd3dfd..7656f975d2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1333,4 +1333,21 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer return em.createQuery(cq).getSingleResult(); } + + + /** + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * + * @param dv + * The dataset version whose archival copy location we want to update. Must not be {@code null}. + * @param archivalStatusPending + * the JSON status string, may be {@code null}. + */ + public void persistArchivalCopyLocation(DatasetVersion dv) { + em.createNativeQuery( + "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") + .setParameter(1, dv.getArchivalCopyLocation()) + .setParameter(2, dv.getId()) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1b3016ec2f4..c8e66115575 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1280,6 +1280,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this * command within the CuratePublishedDatasetVersionCommand was causing an error: diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 29c27d0396d..7e39a8e7b85 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -63,7 +63,8 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { token = ctxt.authentication().generateApiTokenForUser(user); } performArchiveSubmission(version, token, requestedSettings); - return ctxt.em().merge(version); + ctxt.datasetVersion().persistArchivalCopyLocation(version); + return version; } /** From ec5046cc161193fd102481a9a53cb439c5768f94 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Jan 2026 10:55:48 -0500 Subject: [PATCH 04/22] catch OLE when persisting archivalcopylocation --- .../dataverse/DatasetVersionServiceBean.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 7656f975d2a..b5e964e5673 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -33,6 +33,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; +import jakarta.persistence.OptimisticLockException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; @@ -1336,18 +1337,25 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer /** - * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred so this method will check + * for OptimisticLockExceptions and retry the update with the latest version. * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. - * @param archivalStatusPending - * the JSON status string, may be {@code null}. */ public void persistArchivalCopyLocation(DatasetVersion dv) { - em.createNativeQuery( - "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") - .setParameter(1, dv.getArchivalCopyLocation()) - .setParameter(2, dv.getId()) - .executeUpdate(); + try { + em.merge(dv); + em.flush(); // Force the update and version check immediately + } catch (OptimisticLockException ole) { + logger.log(Level.INFO, "OptimisticLockException while persisting archival copy location for DatasetVersion id={0}. Retrying on latest version.", dv.getId()); + DatasetVersion currentVersion = find(dv.getId()); + if (currentVersion != null) { + currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); + em.merge(currentVersion); + } else { + logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); + } + } } } From c1055b87cd3445adc0a21f4248c1ec2fb4442774 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:23:22 -0500 Subject: [PATCH 05/22] Add obsolete state, update display, add supportsDelete --- .../edu/harvard/iq/dataverse/DatasetPage.java | 81 ++++++++++++------- .../harvard/iq/dataverse/DatasetVersion.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 4 + .../GoogleCloudSubmitToArchiveCommand.java | 39 ++++++++- src/main/java/propertyFiles/Bundle.properties | 1 + src/main/webapp/dataset-versions.xhtml | 8 +- 6 files changed, 103 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 281734cd66e..0832560eafb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -42,6 +42,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.ingest.IngestRequest; @@ -105,6 +106,9 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.persistence.OptimisticLockException; import org.apache.commons.lang3.StringUtils; @@ -2992,27 +2996,40 @@ public String updateCurrentVersion() { String className = settingsService.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), updateVersion); if (archiveCommand != null) { - // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); - /* - * Then try to generate and submit an archival copy. Note that running this - * command within the CuratePublishedDatasetVersionCommand was causing an error: - * "The attribute [id] of class - * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary - * key column in the database. Updates are not allowed." To avoid that, and to - * simplify reporting back to the GUI whether this optional step succeeded, I've - * pulled this out as a separate submit(). - */ - try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + //There is an archiver configured, so now decide what to dO: + // If a successful copy exists, don't automatically update, just note the old copy is obsolete (and enable the superadmin button in the display to allow a ~manual update if desired) + // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare + // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again + String status = updateVersion.getArchivalCopyLocationStatus(); + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + // Delete the record of any existing copy since it is now out of date/incorrect + updateVersion.setArchivalCopyLocation(null); + /* + * Then try to generate and submit an archival copy. Note that running this + * command within the CuratePublishedDatasetVersionCommand was causing an error: + * "The attribute [id] of class + * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary + * key column in the database. Updates are not allowed." To avoid that, and to + * simplify reporting back to the GUI whether this optional step succeeded, I've + * pulled this out as a separate submit(). + */ + try { + updateVersion = commandEngine.submit(archiveCommand); + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); + } else { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + } + } catch (CommandException ex) { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); + logger.severe(ex.getMessage()); } - } catch (CommandException ex) { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); - logger.severe(ex.getMessage()); + } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); + JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); + job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.merge(updateVersion); } } } @@ -6094,14 +6111,16 @@ public void refreshPaginator() { * * @param id - the id of the datasetversion to archive. */ - public void archiveVersion(Long id) { + public void archiveVersion(Long id, boolean force) { if (session.getUser() instanceof AuthenticatedUser) { DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - + String status = dv.getArchivalCopyLocationStatus(); + if(status == null || (force && cmd.supportsDelete())){ + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); datasetVersionService.persistArchivalCopyLocation(dv); @@ -6113,7 +6132,7 @@ public void archiveVersion(Long id) { setVersionTabList(resetVersionTabList()); this.setVersionTabListForPostLoad(getVersionTabList()); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); - + } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); @@ -6146,21 +6165,26 @@ public boolean isArchivable() { return archivable; } + /** Method to decide if a 'Submit' button should be enabled for archiving a dataset version. */ public boolean isVersionArchivable() { if (versionArchivable == null) { // If this dataset isn't in an archivable collection return false versionArchivable = false; if (isArchivable()) { - boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only // If it is, we have to check for an existing archived version to answer the // question String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { + boolean checkForArchivalCopy = false; Class clazz = Class.forName(className); Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Method m2 = clazz.getMethod("supportsDelete"); + Object[] params = { settingsWrapper }; + boolean supportsDelete = (Boolean) m2.invoke(null); checkForArchivalCopy = (Boolean) m.invoke(null, params); if (checkForArchivalCopy) { @@ -6168,9 +6192,12 @@ public boolean isVersionArchivable() { // one version is already archived (or attempted - any non-null status) versionArchivable = !isSomeVersionArchived(); } else { - // If we allow multiple versions or didn't find one that has had archiving run - // on it, we can archive, so return true - versionArchivable = true; + // If we didn't find one that has had archiving run + // on it, or we archiving per version is supported and either + // the status is null or the archiver can delete prior runs and status isn't success, + // we can archive, so return true + String status = workingVersion.getArchivalCopyLocationStatus(); + versionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete)); } } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 93b0ccfef61..0de0dedc860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -132,6 +132,7 @@ public enum VersionState { public static final String ARCHIVAL_STATUS_PENDING = "pending"; public static final String ARCHIVAL_STATUS_SUCCESS = "success"; public static final String ARCHIVAL_STATUS_FAILURE = "failure"; + public static final String ARCHIVAL_STATUS_OBSOLETE = "obsolete"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 7e39a8e7b85..f7716534b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -184,4 +184,8 @@ public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } + + public static boolean supportsDelete() { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 7dfb9f07e19..97ca104f01c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,6 +44,11 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } + @Override + public static boolean supportsDelete() { + return true; + } + @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); @@ -73,6 +78,34 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Check for and delete existing files for this version + String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + + logger.fine("Checking for existing files in archive..."); + + try { + Blob existingDatacite = bucket.get(dataciteFileName); + if (existingDatacite != null && existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + existingDatacite.delete(); + logger.fine("Deleted existing datacite.xml"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + } + + try { + Blob existingBag = bucket.get(bagFileName); + if (existingBag != null && existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + bagFileName); + existingBag.delete(); + logger.fine("Deleted existing bag file"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + } + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); @@ -102,7 +135,7 @@ public void run() { Thread.sleep(10); i++; } - Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); dcThread.join(); String checksum = dcXml.getMd5ToHexString(); @@ -131,7 +164,7 @@ public void run() { try (PipedInputStream in = new PipedInputStream(100000); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip", + Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", Bucket.BlobWriteOption.doesNotExist()); if (bag.getSize() == 0) { throw new IOException("Empty Bag"); @@ -139,7 +172,7 @@ public void run() { bagThread.join(); checksum = bag.getMd5ToHexString(); - logger.fine("Bag: " + fileName + " added with checksum: " + checksum); + logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); if (!success || !checksum.equals(localchecksum)) { logger.severe(success ? checksum + " not equal to " + localchecksum diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index d9b9fd7bc48..dbc2ce40657 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2141,6 +2141,7 @@ file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) wit file.dataFilesTab.versions.headers.published=Published on file.dataFilesTab.versions.headers.archived=Archival Status file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.obsolete=Original Version Archived file.dataFilesTab.versions.headers.archived.pending=Pending file.dataFilesTab.versions.headers.archived.failure=Failed file.dataFilesTab.versions.headers.archived.notarchived=Not Archived diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 9e5f0a9b24d..1f33675bd3d 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -169,9 +169,15 @@ + + + + + + - From f912fd043945850ac87d396833cdc9c94d62f56c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:34:32 -0500 Subject: [PATCH 06/22] doc that api doesn't handls supportsDelete yet --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 18f28569d7d..10aadde57b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2067,6 +2067,7 @@ public Response submitDatasetVersionToArchive(@Context ContainerRequestContext c if(dv==null) { return error(Status.BAD_REQUEST, "Requested version not found."); } + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); // Note - the user is being sent via the createDataverseRequest(au) call to the @@ -2132,7 +2133,7 @@ public Response archiveAllUnarchivedDatasetVersions(@Context ContainerRequestCon try { AuthenticatedUser au = getRequestAuthenticatedUserOrDie(crc); - + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { From 00f115e23e50f8d70338256fbd34d8270a9900a1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:55:51 -0500 Subject: [PATCH 07/22] support reflective and instance calls re: delete capability --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../impl/AbstractSubmitToArchiveCommand.java | 14 +++++++++++--- .../impl/GoogleCloudSubmitToArchiveCommand.java | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0832560eafb..09669fb789e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6119,7 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.supportsDelete())){ + if(status == null || (force && cmd.canDelete())){ // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index f7716534b7f..aaeef193ff4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -185,7 +185,15 @@ public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } - public static boolean supportsDelete() { - return false; - } + /** Whether the archiver can delete existing archival files (and thus can retry when the existing files are incomplete/obsolete) + * A static version supports calls via reflection while the instance method supports inheritance for use on actual command instances (see DatasetPage for both use cases). + * @return + */ + public static boolean supportsDelete() { + return false; + } + + public boolean canDelete() { + return supportsDelete(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 97ca104f01c..61a38cffc99 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,10 +44,13 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } - @Override public static boolean supportsDelete() { return true; } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { From bc403703ab672d1ac30ba16d928a3eaa1de87214 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 10 Dec 2025 16:14:30 -0500 Subject: [PATCH 08/22] use query to update status, async everywhere --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 14 +++++--------- .../edu/harvard/iq/dataverse/api/Datasets.java | 10 +++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 09669fb789e..db9e9caa671 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3014,21 +3014,18 @@ public String updateCurrentVersion() { * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } catch (CommandException ex) { errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + //Not automatically replacing the old archival copy as creating it is expensive JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); datasetVersionService.merge(updateVersion); } } @@ -6122,8 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if(status == null || (force && cmd.canDelete())){ // Set initial pending status - dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - datasetVersionService.persistArchivalCopyLocation(dv); + datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c8e66115575..bf0f7c6668a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this @@ -1291,12 +1291,8 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + successMsg = BundleUtil.getStringFromBundle("datasetversion.archive.inprogress"); } catch (CommandException ex) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); From df9b5cec3c83ec066dc274d35edea9ee9f9e98a6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Dec 2025 18:23:56 -0500 Subject: [PATCH 09/22] fixes for dataset page re: archiving --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 1f33675bd3d..89a8162c135 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -170,14 +170,14 @@ - + - From a64e1f749c2f44c14b4386e1c22195e1c65d8ea8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 16 Jan 2026 13:33:19 -0500 Subject: [PATCH 10/22] merge issues --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index db9e9caa671..4b559af3878 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,7 @@ public void refreshPaginator() { /** * This method can be called from *.xhtml files to allow archiving of a dataset - * version from the user interface. It is not currently (11/18) used in the IQSS/develop - * branch, but is used by QDR and is kept here in anticipation of including a - * GUI option to archive (already published) versions after other dataset page - * changes have been completed. + * version from the user interface. * * @param id - the id of the datasetversion to archive. */ From c55230ee81481b465323b16800e98679fe5fa36c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jan 2026 17:38:37 -0500 Subject: [PATCH 11/22] merge fix of persistArchivalCopy method refactors --- .../edu/harvard/iq/dataverse/DatasetPage.java | 30 +++++++++---------- .../harvard/iq/dataverse/DatasetVersion.java | 24 ++++++++++----- .../harvard/iq/dataverse/api/Datasets.java | 2 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 4b559af3878..fe17a137361 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3022,10 +3022,8 @@ public String updateCurrentVersion() { } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { //Not automatically replacing the old archival copy as creating it is expensive - JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); - JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); - job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); + updateVersion.setArchivalStatus(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + datasetVersionService.persistArchivalCopyLocation(updateVersion); datasetVersionService.merge(updateVersion); } } @@ -6113,18 +6111,18 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.canDelete())){ - - // Set initial pending status - datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); - - commandEngine.submitAsync(cmd); - - logger.info( - "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + if (status == null || (force && cmd.canDelete())) { + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + datasetVersionService.persistArchivalCopyLocation(dv); + commandEngine.submitAsync(cmd); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 0de0dedc860..1248a8266ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -232,8 +232,9 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + //The Json version of the archivalCopyLocation string @Transient - private JsonObject archivalStatus; + private JsonObject archivalCopyLocationJson; public Long getId() { return this.id; @@ -384,24 +385,24 @@ public String getArchivalCopyLocation() { public String getArchivalCopyLocationStatus() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS); } return null; } public String getArchivalCopyLocationMessage() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS_MESSAGE); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS_MESSAGE); } return null; } private void populateArchivalStatus(boolean force) { - if(archivalStatus ==null || force) { + if(archivalCopyLocationJson ==null || force) { if(archivalCopyLocation!=null) { try { - archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + archivalCopyLocationJson = JsonUtil.getJsonObject(archivalCopyLocation); } catch(Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); logger.fine(archivalCopyLocation); @@ -415,6 +416,15 @@ public void setArchivalCopyLocation(String location) { populateArchivalStatus(true); } + // COnvenience method to set only the status + public void setArchivalStatus(String status) { + populateArchivalStatus(false); + JsonObjectBuilder job = Json.createObjectBuilder(archivalCopyLocationJson); + job.add(DatasetVersion.ARCHIVAL_STATUS, status); + archivalCopyLocationJson = job.build(); + archivalCopyLocation = JsonUtil.prettyPrint(archivalCopyLocationJson); + } + public String getDeaccessionLink() { return deaccessionLink; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index bf0f7c6668a..dba4b36d4da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); + updateVersion.setArchivalCopyLocation(null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this From 905570a81563b8428042398ac1778fd4d380b61d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 12:57:38 -0500 Subject: [PATCH 12/22] add flag, docs --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++++ .../java/edu/harvard/iq/dataverse/DatasetPage.java | 3 ++- .../harvard/iq/dataverse/settings/FeatureFlags.java | 13 +++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a9d5c7c0041..68982881d77 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2263,6 +2263,9 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. + .. _Duracloud Configuration: Duracloud Configuration @@ -4031,6 +4034,13 @@ dataverse.feature.only-update-datacite-when-needed Only contact DataCite to update a DOI after checking to see if DataCite has outdated information (for efficiency, lighter load on DataCite, especially when using file DOIs). +.. _dataverse.feature.archive-on-version-update: + +dataverse.feature.archive-on-version-update ++++++++++++++++++++++++++++++++++++++++++++ + +Indicates whether archival bag creation should be triggered (if configured) when a version is updated and was already successfully archived, +i.e via the Update-Current-Version publication option. Setting the flag true only works if the archiver being used supports deleting existing archival bags. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index fe17a137361..a091005b392 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -164,6 +164,7 @@ import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -3001,7 +3002,7 @@ public String updateCurrentVersion() { // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again String status = updateVersion.getArchivalCopyLocationStatus(); - if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); /* diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 2e86fae610e..fdbdb257dbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -249,6 +249,19 @@ public enum FeatureFlags { * @since Dataverse 6.9 */ ONLY_UPDATE_DATACITE_WHEN_NEEDED("only-update-datacite-when-needed"), + /** + * Indicates whether archival bag creation should be triggered (if configured) when a version + * is updated and was already successfully archived, i.e via the Update-Current-Version publication option. + * Since archiving can be resource intensive, it may not be worthwhile to automatically re-archive for the + * types of minor changes "Update-Current-Version" is intended for. Note that this flag is only effective + * for archivers that support deletion of existing files. When the flag is false, or the archiver cannot + * delete, the existing archival status will be changed to "Obsolete". + * + * * @apiNote Raise flag by setting "dataverse.feature.archive-on-version-update" + * + * @since Dataverse 6.10 + */ + ARCHIVE_ON_VERSION_UPDATE("archive-on-version-update"), ; From 521fbf68f2d6ba72b06343c32cf6154b027c899f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 15:01:50 -0500 Subject: [PATCH 13/22] add delete to local and S3 --- .../impl/LocalSubmitToArchiveCommand.java | 49 +++++++++- .../impl/S3SubmitToArchiveCommand.java | 94 ++++++++++++++++--- 2 files changed, 129 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 462879f2ec9..34fadbed703 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -34,6 +34,14 @@ public class LocalSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -57,15 +65,52 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Define file paths + String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + File existingDatacite = new File(dataciteFileName); + if (existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + if (existingDatacite.delete()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); + } + } + + File existingBag = new File(zipName); + if (existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + zipName); + if (existingBag.delete()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + zipName); + } + } + + // Also check for and delete the .partial file if it exists + File existingPartial = new File(zipName + ".partial"); + if (existingPartial.exists()) { + logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); + if (existingPartial.delete()) { + logger.fine("Deleted existing partial bag file"); + } else { + logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); + } + } + String dataciteXml = getDataCiteXml(dv); FileUtils.writeStringToFile( - new File(localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"), + new File(dataciteFileName), dataciteXml, StandardCharsets.UTF_8); BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? bagger.generateBag(new FileOutputStream(zipName + ".partial")); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 65531d775c8..768d5d03e1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -14,9 +14,7 @@ import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileInputStream; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.concurrent.CompletableFuture; @@ -38,18 +36,15 @@ import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.core.async.AsyncRequestBody; -import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesRequest; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesResponse; -import software.amazon.awssdk.services.s3.model.ObjectAttributes; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.S3ClientBuilder; -import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.http.async.SdkAsyncHttpClient; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.utils.StringUtils; @@ -76,6 +71,14 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -105,10 +108,78 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (dataset.getLockFor(Reason.finalizePublication) == null) { spaceName = getSpaceName(dataset); - String dataciteXml = getDataCiteXml(dv); - // Add datacite.xml file + + // Define keys for datacite.xml and bag file String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + try { + HeadObjectRequest headDcRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + s3.headObject(headDcRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing datacite.xml, deleting: " + dcKey); + DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); + DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); + + if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dcKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing datacite.xml found"); + } else { + logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); + } + } + try { + HeadObjectRequest headBagRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + s3.headObject(headBagRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing bag file, deleting: " + bagKey); + DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); + DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); + + if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + bagKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing bag file found"); + } else { + logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); + } + } + + String dataciteXml = getDataCiteXml(dv); + // Add datacite.xml file PutObjectRequest putRequest = PutObjectRequest.builder() .bucket(bucketName) .key(dcKey) @@ -128,7 +199,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Store BagIt file String fileName = getFileName(spaceName, dv); - String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer From ba04ba2455529ed7f8f5bba5cf5818fc255f364e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 16:50:42 -0500 Subject: [PATCH 14/22] fix doc ref --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 68982881d77..d0b4eac6ab2 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: From 7a186693a02683b752f898b18eb425d3ea84134d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 17:11:32 -0500 Subject: [PATCH 15/22] remove errant : char --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d0b4eac6ab2..d6cea5b16e3 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: From ae91b78dbbec09899c9040730567e978698d406c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 23 Jan 2026 15:05:21 -0500 Subject: [PATCH 16/22] no transaction time limit during bagging from command (not workflow) --- .../impl/AbstractSubmitToArchiveCommand.java | 63 +++- .../impl/DRSSubmitToArchiveCommand.java | 78 ++++- .../impl/DuraCloudSubmitToArchiveCommand.java | 295 +++++++++--------- .../GoogleCloudSubmitToArchiveCommand.java | 215 +++++++------ .../impl/LocalSubmitToArchiveCommand.java | 129 ++++---- .../impl/S3SubmitToArchiveCommand.java | 219 ++++++------- .../iq/dataverse/util/bagit/BagGenerator.java | 32 +- .../workflow/WorkflowServiceBean.java | 2 +- .../ArchivalSubmissionWorkflowStep.java | 40 ++- 9 files changed, 600 insertions(+), 473 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index aaeef193ff4..ffa79456902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -2,8 +2,9 @@ import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -17,7 +18,11 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.json.JsonObject; import java.io.IOException; import java.io.PipedInputStream; @@ -30,8 +35,8 @@ @RequiredPermissions(Permission.PublishDataset) public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand { - private final DatasetVersion version; - private final Map requestedSettings = new HashMap(); + protected final DatasetVersion version; + protected final Map requestedSettings = new HashMap(); protected boolean success=false; private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); private static final int MAX_ZIP_WAIT = 20000; @@ -43,8 +48,16 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override + @TransactionAttribute(TransactionAttributeType.REQUIRED) public DatasetVersion execute(CommandContext ctxt) throws CommandException { + // Check for locks while we're still in a transaction + Dataset dataset = version.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) != null + || dataset.getLockFor(Reason.FileValidationFailed) != null) { + throw new CommandException("Dataset is locked and cannot be archived", this); + } + String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); String[] settingsArray = settings.split(","); for (String setting : settingsArray) { @@ -62,11 +75,40 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //No un-expired token token = ctxt.authentication().generateApiTokenForUser(user); } - performArchiveSubmission(version, token, requestedSettings); - ctxt.datasetVersion().persistArchivalCopyLocation(version); + String dataCiteXml = getDataCiteXml(version); + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + Map terms = getJsonLDTerms(oreMap); + performArchivingAndPersist(ctxt, version, dataCiteXml, ore, terms, token, requestedSettings); return version; } + // While we have a transaction context, get the terms needed to create the baginfo file + public Map getJsonLDTerms(OREMap oreMap) { + Map terms = new HashMap(); + terms.put(DatasetFieldConstant.datasetContact, oreMap.getContactTerm()); + terms.put(DatasetFieldConstant.datasetContactName, oreMap.getContactNameTerm()); + terms.put(DatasetFieldConstant.datasetContactEmail, oreMap.getContactEmailTerm()); + terms.put(DatasetFieldConstant.description, oreMap.getDescriptionTerm()); + terms.put(DatasetFieldConstant.descriptionText, oreMap.getDescriptionTextTerm()); + + return terms; + } + + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public WorkflowStepResult performArchivingAndPersist(CommandContext ctxt, DatasetVersion version, String dataCiteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSetttings) { + // This runs OUTSIDE any transaction + BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads()); + WorkflowStepResult wfsr = performArchiveSubmission(version, dataCiteXml, ore, terms, token, requestedSettings); + persistResult(ctxt, version); + return wfsr; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + private void persistResult(CommandContext ctxt, DatasetVersion versionWithStatus) { + // New transaction just for this quick operation + ctxt.datasetVersion().persistArchivalCopyLocation(versionWithStatus); + } /** * This method is the only one that should be overwritten by other classes. Note * that this method may be called from the execute method above OR from a @@ -75,10 +117,13 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { * constructor and could be dropped from the parameter list.) * * @param version - the DatasetVersion to archive + * @param ore + * @param dataCiteXml + * @param terms * @param token - an API Token for the user performing this action * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). */ - abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, Map requestedSetttings); + abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, String dataCiteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSetttings); protected int getNumberOfBagGeneratorThreads() { if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { @@ -98,7 +143,7 @@ public String describe() { + version.getFriendlyVersionNumber()+")]"; } - String getDataCiteXml(DatasetVersion dv) { + public String getDataCiteXml(DatasetVersion dv) { DataCitation dc = new DataCitation(dv); Map metadata = dc.getDataCiteMetadata(); return DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, @@ -106,13 +151,13 @@ String getDataCiteXml(DatasetVersion dv) { } public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInputStream digestInputStream2, - String dataciteXml, ApiToken token) throws IOException, InterruptedException { + String dataciteXml, JsonObject ore, Map terms, ApiToken token) throws IOException, InterruptedException { Thread bagThread = new Thread(new Runnable() { public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setNumConnections(getNumberOfBagGeneratorThreads()); + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); success = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 78e8454255b..81bcbc25dda 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -4,13 +4,19 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.SettingsWrapper; +import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -34,6 +40,8 @@ import java.util.Set; import java.util.logging.Logger; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.json.Json; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; @@ -77,13 +85,73 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String TRUST_CERT = "trust_cert"; private static final String TIMEOUT = "timeout"; + private String archivableAncestorAlias; + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + @TransactionAttribute(TransactionAttributeType.REQUIRED) + public DatasetVersion execute(CommandContext ctxt) throws CommandException { + + + // Check for locks while we're still in a transaction + Dataset dataset = version.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) != null + || dataset.getLockFor(Reason.FileValidationFailed) != null) { + throw new CommandException("Dataset is locked and cannot be archived", this); + } + + String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); + String[] settingsArray = settings.split(","); + for (String setting : settingsArray) { + setting = setting.trim(); + if (!setting.startsWith(":")) { + logger.warning("Invalid Archiver Setting: " + setting); + } else { + requestedSettings.put(setting, ctxt.settings().get(setting)); + } + } + + // Compute archivable ancestor while we're in a transaction and entities are managed + JsonObject drsConfigObject = null; + try { + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionsObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionsObj != null) { + Set collections = collectionsObj.keySet(); + Dataverse ancestor = dataset.getOwner(); + // Compute this while entities are still managed + archivableAncestorAlias = getArchivableAncestor(ancestor, collections); + } + } + } + + AuthenticatedUser user = getRequest().getAuthenticatedUser(); + ApiToken token = ctxt.authentication().findApiTokenByUser(user); + if (token == null) { + //No un-expired token + token = ctxt.authentication().generateApiTokenForUser(user); + } + String dataCiteXml = getDataCiteXml(version); + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + Map terms = getJsonLDTerms(oreMap); + performArchivingAndPersist(ctxt, version, dataCiteXml, ore, terms, token, requestedSettings); + return version; + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { logger.fine("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; @@ -97,7 +165,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); - String alias = getArchivableAncestor(ancestor, collections); + String alias = archivableAncestorAlias; // Use the pre-computed alias instead of calling getArchivableAncestor again String spaceName = getSpaceName(dataset); String packageId = getFileName(spaceName, dv); @@ -113,7 +181,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); - WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, dataciteXml, ore, terms, token, requestedSettings); JsonObjectBuilder statusObject = Json.createObjectBuilder(); statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); @@ -242,7 +310,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.severe("DRS Ingest Failed for: " + packageId + " - response does not include status and message"); return new Failure( - "DRS Archiver fail in Ingest call \" - response does not include status and message"); + "DRS Archiver fail in Ingest call - response does not include status and message"); } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index fe4a25091d7..b1fa777478b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -10,6 +9,8 @@ import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudContext; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudHost; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudPort; + +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -49,8 +50,8 @@ public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, jakarta.json.JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; @@ -64,173 +65,165 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // This will make the archivalCopyLocation non-null after a failure which should // stop retries - if (dataset.getLockFor(Reason.finalizePublication) == null - && dataset.getLockFor(Reason.FileValidationFailed) == null) { - // Use Duracloud client classes to login - ContentStoreManager storeManager = new ContentStoreManagerImpl(host, port, dpnContext); - Credential credential = new Credential(System.getProperty("duracloud.username"), - System.getProperty("duracloud.password")); - storeManager.login(credential); + // Use Duracloud client classes to login + ContentStoreManager storeManager = new ContentStoreManagerImpl(host, port, dpnContext); + Credential credential = new Credential(System.getProperty("duracloud.username"), + System.getProperty("duracloud.password")); + storeManager.login(credential); + /* + * Aliases can contain upper case characters which are not allowed in space + * names. Similarly, aliases can contain '_' which isn't allowed in a space + * name. The line below replaces any upper case chars with lowercase and + * replaces any '_' with '.-' . The '-' after the dot assures we don't break the + * rule that + * "The last period in a aspace may not immediately be followed by a number". + * (Although we could check, it seems better to just add '.-' all the time.As + * written the replaceAll will also change any chars not valid in a spaceName to + * '.' which would avoid code breaking if the alias constraints change. That + * said, this line may map more than one alias to the same spaceName, e.g. + * "test" and "Test" aliases both map to the "test" space name. This does not + * break anything but does potentially put bags from more than one collection in + * the same space. + */ + String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); + String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); + + ContentStore store; + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + + try { /* - * Aliases can contain upper case characters which are not allowed in space - * names. Similarly, aliases can contain '_' which isn't allowed in a space - * name. The line below replaces any upper case chars with lowercase and - * replaces any '_' with '.-' . The '-' after the dot assures we don't break the - * rule that - * "The last period in a aspace may not immediately be followed by a number". - * (Although we could check, it seems better to just add '.-' all the time.As - * written the replaceAll will also change any chars not valid in a spaceName to - * '.' which would avoid code breaking if the alias constraints change. That - * said, this line may map more than one alias to the same spaceName, e.g. - * "test" and "Test" aliases both map to the "test" space name. This does not - * break anything but does potentially put bags from more than one collection in - * the same space. + * If there is a failure in creating a space, it is likely that a prior version + * has not been fully processed (snapshot created, archiving completed and files + * and space deleted - currently manual operations done at the project's + * duracloud website) */ - String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); - String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); - - ContentStore store; - //Set a failure status that will be updated if we succeed - JsonObjectBuilder statusObject = Json.createObjectBuilder(); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - - try { - /* - * If there is a failure in creating a space, it is likely that a prior version - * has not been fully processed (snapshot created, archiving completed and files - * and space deleted - currently manual operations done at the project's - * duracloud website) - */ - store = storeManager.getPrimaryContentStore(); - // Create space to copy archival files to - if (!store.spaceExists(spaceName)) { - store.createSpace(spaceName); - } - String dataciteXml = getDataCiteXml(dv); - - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); - DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { - // Add datacite.xml file - - Thread dcThread = new Thread(new Runnable() { - public void run() { - try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - - dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); - dataciteOut.close(); - success=true; - } catch (Exception e) { - logger.severe("Error creating datacite.xml: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - } + store = storeManager.getPrimaryContentStore(); + // Create space to copy archival files to + if (!store.spaceExists(spaceName)) { + store.createSpace(spaceName); + } + + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + + Thread dcThread = new Thread(new Runnable() { + public void run() { + try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { + + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); + dataciteOut.close(); + success=true; + } catch (Exception e) { + logger.severe("Error creating datacite.xml: " + e.getMessage()); + // TODO Auto-generated catch block + e.printStackTrace(); } - }); - dcThread.start(); - // Have seen Pipe Closed errors for other archivers when used as a workflow - // without this delay loop - int i = 0; - while (digestInputStream.available() <= 0 && i < 100) { - Thread.sleep(10); - i++; } - String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, - -1l, null, null, null); - logger.fine("Content: datacite.xml added with checksum: " + checksum); - dcThread.join(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { + Thread.sleep(10); + i++; + } + String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, + -1l, null, null, null); + logger.fine("Content: datacite.xml added with checksum: " + checksum); + dcThread.join(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + baseFileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } + return new Failure("Error in transferring DataCite.xml file to DuraCloud", + "DuraCloud Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + success = false; + String fileName = baseFileName + ".zip"; + + // Add BagIt ZIP file + // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the + // transfer + + messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, ore, terms, token); + checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); + bagThread.join(); + if (success) { + logger.fine("Content: " + fileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); + } if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + baseFileName); + logger.severe("Failure on " + fileName); logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); try { + store.deleteContent(spaceName, fileName); store.deleteContent(spaceName, baseFileName + "_datacite.xml"); } catch (ContentStoreException cse) { logger.warning(cse.getMessage()); } - return new Failure("Error in transferring DataCite.xml file to DuraCloud", - "DuraCloud Submission Failure: incomplete metadata transfer"); - } - - // Store BagIt file - success = false; - String fileName = baseFileName + ".zip"; - - // Add BagIt ZIP file - // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the - // transfer - - messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(100000); - DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); - bagThread.join(); - if (success) { - logger.fine("Content: " + fileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - } - if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + fileName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); - try { - store.deleteContent(spaceName, fileName); - store.deleteContent(spaceName, baseFileName + "_datacite.xml"); - } catch (ContentStoreException cse) { - logger.warning(cse.getMessage()); - } - return new Failure("Error in transferring Zip file to DuraCloud", - "DuraCloud Submission Failure: incomplete archive transfer"); - } + return new Failure("Error in transferring Zip file to DuraCloud", + "DuraCloud Submission Failure: incomplete archive transfer"); } + } - logger.fine("DuraCloud Submission step: Content Transferred"); + logger.fine("DuraCloud Submission step: Content Transferred"); - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) - StringBuffer sb = new StringBuffer("https://"); - sb.append(host); - if (!port.equals("443")) { - sb.append(":" + port); - } - sb.append("/duradmin/spaces/sm/"); - sb.append(store.getStoreId()); - sb.append("/" + spaceName + "/" + fileName); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - - logger.fine("DuraCloud Submission step complete: " + sb.toString()); - } catch (ContentStoreException | IOException e) { - // TODO Auto-generated catch block - logger.warning(e.getMessage()); - e.printStackTrace(); - return new Failure("Error in transferring file to DuraCloud", - "DuraCloud Submission Failure: archive file not transferred"); - } catch (InterruptedException e) { - logger.warning(e.getLocalizedMessage()); - e.printStackTrace(); + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + StringBuffer sb = new StringBuffer("https://"); + sb.append(host); + if (!port.equals("443")) { + sb.append(":" + port); } - } catch (ContentStoreException e) { + sb.append("/duradmin/spaces/sm/"); + sb.append(store.getStoreId()); + sb.append("/" + spaceName + "/" + fileName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); + + logger.fine("DuraCloud Submission step complete: " + sb.toString()); + } catch (ContentStoreException | IOException e) { + // TODO Auto-generated catch block logger.warning(e.getMessage()); e.printStackTrace(); - String mesg = "DuraCloud Submission Failure"; - if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { - mesg = mesg + ": Prior Version archiving not yet complete?"; - } - return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); - } catch (NoSuchAlgorithmException e) { - logger.severe("MD5 MessageDigest not available!"); + return new Failure("Error in transferring file to DuraCloud", + "DuraCloud Submission Failure: archive file not transferred"); + } catch (InterruptedException e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); } - finally { - dv.setArchivalCopyLocation(statusObject.build().toString()); + } catch (ContentStoreException e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + String mesg = "DuraCloud Submission Failure"; + if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { + mesg = mesg + ": Prior Version archiving not yet complete?"; } - } else { - logger.warning( - "DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); - return new Failure("Dataset locked"); + return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); + } catch (NoSuchAlgorithmException e) { + logger.severe("MD5 MessageDigest not available!"); + } + finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); } return WorkflowStepResult.OK; } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 61a38cffc99..f662de36792 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -7,7 +7,6 @@ import com.google.cloud.storage.StorageException; import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -16,11 +15,15 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudBucket; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudProject; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + import org.apache.commons.codec.binary.Hex; import jakarta.json.Json; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import java.io.File; import java.io.FileInputStream; @@ -53,7 +56,7 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSettings) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); String bucketName = requestedSettings.get(GOOGLECLOUD_BUCKET); String projectName = requestedSettings.get(GOOGLECLOUD_PROJECT); @@ -76,135 +79,127 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Bucket bucket = storage.get(bucketName); Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); - // Check for and delete existing files for this version - String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + // Check for and delete existing files for this version + String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - logger.fine("Checking for existing files in archive..."); + logger.fine("Checking for existing files in archive..."); - try { - Blob existingDatacite = bucket.get(dataciteFileName); - if (existingDatacite != null && existingDatacite.exists()) { - logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); - existingDatacite.delete(); - logger.fine("Deleted existing datacite.xml"); - } - } catch (StorageException se) { - logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + try { + Blob existingDatacite = bucket.get(dataciteFileName); + if (existingDatacite != null && existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + existingDatacite.delete(); + logger.fine("Deleted existing datacite.xml"); } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + } - try { - Blob existingBag = bucket.get(bagFileName); - if (existingBag != null && existingBag.exists()) { - logger.fine("Found existing bag file, deleting: " + bagFileName); - existingBag.delete(); - logger.fine("Deleted existing bag file"); - } - } catch (StorageException se) { - logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + try { + Blob existingBag = bucket.get(bagFileName); + if (existingBag != null && existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + bagFileName); + existingBag.delete(); + logger.fine("Deleted existing bag file"); } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + } - String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); - DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { - // Add datacite.xml file - - Thread dcThread = new Thread(new Runnable() { - public void run() { - try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - - dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); - dataciteOut.close(); - success = true; - } catch (Exception e) { - logger.severe("Error creating datacite.xml: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - // throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); - } + // Upload datacite.xml + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + + Thread dcThread = new Thread(new Runnable() { + public void run() { + try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { + + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); + dataciteOut.close(); + success = true; + } catch (Exception e) { + logger.severe("Error creating datacite.xml: " + e.getMessage()); + e.printStackTrace(); } - }); - dcThread.start(); - // Have seen Pipe Closed errors for other archivers when used as a workflow - // without this delay loop - int i = 0; - while (digestInputStream.available() <= 0 && i < 100) { - Thread.sleep(10); - i++; } - Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { + Thread.sleep(10); + i++; + } + Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + + dcThread.join(); + String checksum = dcXml.getMd5ToHexString(); + logger.fine("Content: datacite.xml added with checksum: " + checksum); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + spaceName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed"); + try { + dcXml.delete(Blob.BlobSourceOption.generationMatch()); + } catch (StorageException se) { + logger.warning(se.getMessage()); + } + return new Failure("Error in transferring DataCite.xml file to GoogleCloud", + "GoogleCloud Submission Failure: incomplete metadata transfer"); + } - dcThread.join(); - String checksum = dcXml.getMd5ToHexString(); - logger.fine("Content: datacite.xml added with checksum: " + checksum); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + // Store BagIt file + success = false; + + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, ore, terms, token); + Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", + Bucket.BlobWriteOption.doesNotExist()); + if (bag.getSize() == 0) { + throw new IOException("Empty Bag"); + } + bagThread.join(); + + checksum = bag.getMd5ToHexString(); + logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + spaceName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed"); + logger.severe(success ? checksum + " not equal to " + localchecksum + : "bag transfer did not succeed"); try { - dcXml.delete(Blob.BlobSourceOption.generationMatch()); + bag.delete(Blob.BlobSourceOption.generationMatch()); } catch (StorageException se) { logger.warning(se.getMessage()); } - return new Failure("Error in transferring DataCite.xml file to GoogleCloud", - "GoogleCloud Submission Failure: incomplete metadata transfer"); + return new Failure("Error in transferring Zip file to GoogleCloud", + "GoogleCloud Submission Failure: incomplete archive transfer"); } + } - // Store BagIt file - success = false; - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer - messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(100000); - DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", - Bucket.BlobWriteOption.doesNotExist()); - if (bag.getSize() == 0) { - throw new IOException("Empty Bag"); - } - bagThread.join(); - - checksum = bag.getMd5ToHexString(); - logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - if (!success || !checksum.equals(localchecksum)) { - logger.severe(success ? checksum + " not equal to " + localchecksum - : "bag transfer did not succeed"); - try { - bag.delete(Blob.BlobSourceOption.generationMatch()); - } catch (StorageException se) { - logger.warning(se.getMessage()); - } - return new Failure("Error in transferring Zip file to GoogleCloud", - "GoogleCloud Submission Failure: incomplete archive transfer"); - } - } + logger.fine("GoogleCloud Submission step: Content Transferred"); - logger.fine("GoogleCloud Submission step: Content Transferred"); + // Document the location of dataset archival copy location (actually the URL + // where you can view it as an admin) + // Changed to point at bucket where the zip and datacite.xml are visible - // Document the location of dataset archival copy location (actually the URL - // where you can view it as an admin) - // Changed to point at bucket where the zip and datacite.xml are visible + StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/"); + sb.append(bucketName + "/" + spaceName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/"); - sb.append(bucketName + "/" + spaceName); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - - } - } else { - logger.warning("GoogleCloud Submision Workflow aborted: Dataset locked for pidRegister"); - return new Failure("Dataset locked"); } } catch (Exception e) { logger.warning(e.getLocalizedMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 34fadbed703..38951c8a218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; @@ -10,7 +9,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagItLocalPath; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -19,6 +18,7 @@ import java.util.logging.Logger; import jakarta.json.Json; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import java.io.File; @@ -44,94 +44,91 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { - logger.fine("In LocalCloudSubmitToArchive..."); + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { + logger.fine("In LocalSubmitToArchive..."); String localPath = requestedSettings.get(BagItLocalPath.toString()); String zipName = null; - //Set a failure status that will be updated if we succeed + // Set a failure status that will be updated if we succeed JsonObjectBuilder statusObject = Json.createObjectBuilder(); statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); try { - Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null - && dataset.getLockFor(Reason.FileValidationFailed) == null) { - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); - // Define file paths - String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + // Define file paths + String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; - // Check for and delete existing files for this version - logger.fine("Checking for existing files in archive..."); - - File existingDatacite = new File(dataciteFileName); - if (existingDatacite.exists()) { - logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); - if (existingDatacite.delete()) { - logger.fine("Deleted existing datacite.xml"); - } else { - logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); - } - } + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); - File existingBag = new File(zipName); - if (existingBag.exists()) { - logger.fine("Found existing bag file, deleting: " + zipName); - if (existingBag.delete()) { - logger.fine("Deleted existing bag file"); - } else { - logger.warning("Failed to delete existing bag file: " + zipName); - } + File existingDatacite = new File(dataciteFileName); + if (existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + if (existingDatacite.delete()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); } + } - // Also check for and delete the .partial file if it exists - File existingPartial = new File(zipName + ".partial"); - if (existingPartial.exists()) { - logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); - if (existingPartial.delete()) { - logger.fine("Deleted existing partial bag file"); - } else { - logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); - } + File existingBag = new File(zipName); + if (existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + zipName); + if (existingBag.delete()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + zipName); } + } - String dataciteXml = getDataCiteXml(dv); - - FileUtils.writeStringToFile( - new File(dataciteFileName), - dataciteXml, StandardCharsets.UTF_8); - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); - bagger.setAuthenticationKey(token.getTokenString()); - //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? - bagger.generateBag(new FileOutputStream(zipName + ".partial")); - - File srcFile = new File(zipName + ".partial"); - File destFile = new File(zipName); - - if (srcFile.renameTo(destFile)) { - logger.fine("Localhost Submission step: Content Transferred"); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "file://" + zipName); + // Also check for and delete the .partial file if it exists + File existingPartial = new File(zipName + ".partial"); + if (existingPartial.exists()) { + logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); + if (existingPartial.delete()) { + logger.fine("Deleted existing partial bag file"); } else { - logger.warning("Unable to move " + zipName + ".partial to " + zipName); + logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); } + } + + // Write datacite.xml file + FileUtils.writeStringToFile(new File(dataciteFileName), dataciteXml, StandardCharsets.UTF_8); + logger.fine("Datacite XML written to: " + dataciteFileName); + + // Generate bag + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); + bagger.setAuthenticationKey(token.getTokenString()); + + boolean bagSuccess = bagger.generateBag(new FileOutputStream(zipName + ".partial")); + + if (!bagSuccess) { + logger.severe("Bag generation failed for " + zipName); + return new Failure("Local Submission Failure", "Bag generation failed"); + } + + File srcFile = new File(zipName + ".partial"); + File destFile = new File(zipName); + + if (srcFile.renameTo(destFile)) { + logger.fine("Localhost Submission step: Content Transferred to " + zipName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "file://" + zipName); } else { - logger.warning( - "Localhost Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); - return new Failure("Dataset locked"); + logger.severe("Unable to move " + zipName + ".partial to " + zipName); + return new Failure("Local Submission Failure", "Unable to rename partial file to final file"); } } catch (Exception e) { logger.warning("Failed to archive " + zipName + " : " + e.getLocalizedMessage()); e.printStackTrace(); + return new Failure("Local Submission Failure", e.getLocalizedMessage() + ": check log for details"); } finally { dv.setArchivalCopyLocation(statusObject.build().toString()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 768d5d03e1d..4198cb19fe9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -81,8 +82,8 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { logger.fine("In S3SubmitToArchiveCommand..."); JsonObject configObject = null; @@ -105,139 +106,127 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t try { Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - - spaceName = getSpaceName(dataset); - - // Define keys for datacite.xml and bag file - String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; - String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; - - // Check for and delete existing files for this version - logger.fine("Checking for existing files in archive..."); - - try { - HeadObjectRequest headDcRequest = HeadObjectRequest.builder() - .bucket(bucketName) - .key(dcKey) - .build(); - - s3.headObject(headDcRequest).join(); - - // If we get here, the object exists, so delete it - logger.fine("Found existing datacite.xml, deleting: " + dcKey); - DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() - .bucket(bucketName) - .key(dcKey) - .build(); - - CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); - DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); - - if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { - logger.fine("Deleted existing datacite.xml"); - } else { - logger.warning("Failed to delete existing datacite.xml: " + dcKey); - } - } catch (Exception e) { - if (e.getCause() instanceof NoSuchKeyException) { - logger.fine("No existing datacite.xml found"); - } else { - logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); - } - } + spaceName = getSpaceName(dataset); - try { - HeadObjectRequest headBagRequest = HeadObjectRequest.builder() - .bucket(bucketName) - .key(bagKey) - .build(); - - s3.headObject(headBagRequest).join(); - - // If we get here, the object exists, so delete it - logger.fine("Found existing bag file, deleting: " + bagKey); - DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() - .bucket(bucketName) - .key(bagKey) - .build(); - - CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); - DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); - - if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { - logger.fine("Deleted existing bag file"); - } else { - logger.warning("Failed to delete existing bag file: " + bagKey); - } - } catch (Exception e) { - if (e.getCause() instanceof NoSuchKeyException) { - logger.fine("No existing bag file found"); - } else { - logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); - } - } + // Define keys for datacite.xml and bag file + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + try { + HeadObjectRequest headDcRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + s3.headObject(headDcRequest).join(); - String dataciteXml = getDataCiteXml(dv); - // Add datacite.xml file - PutObjectRequest putRequest = PutObjectRequest.builder() + // If we get here, the object exists, so delete it + logger.fine("Found existing datacite.xml, deleting: " + dcKey); + DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() .bucket(bucketName) .key(dcKey) .build(); - CompletableFuture putFuture = s3.putObject(putRequest, - AsyncRequestBody.fromString(dataciteXml, StandardCharsets.UTF_8)); + CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); + DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); + + if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dcKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing datacite.xml found"); + } else { + logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); + } + } + + try { + HeadObjectRequest headBagRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + s3.headObject(headBagRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing bag file, deleting: " + bagKey); + DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); - // Wait for the put operation to complete - PutObjectResponse putResponse = putFuture.join(); + CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); + DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); - if (!putResponse.sdkHttpResponse().isSuccessful()) { - logger.warning("Could not write datacite xml to S3"); - return new Failure("S3 Archiver failed writing datacite xml file"); + if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + bagKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing bag file found"); + } else { + logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); } + } + + // Add datacite.xml file + PutObjectRequest putRequest = PutObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + CompletableFuture putFuture = s3.putObject(putRequest, + AsyncRequestBody.fromString(dataciteXml, StandardCharsets.UTF_8)); + + // Wait for the put operation to complete + PutObjectResponse putResponse = putFuture.join(); - // Store BagIt file - String fileName = getFileName(spaceName, dv); + if (!putResponse.sdkHttpResponse().isSuccessful()) { + logger.warning("Could not write datacite xml to S3"); + return new Failure("S3 Archiver failed writing datacite xml file"); + } - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer + // Store BagIt file + String fileName = getFileName(spaceName, dv); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); + // Generate bag + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); - UploadFileRequest uploadFileRequest = UploadFileRequest.builder() - .putObjectRequest(req -> req.bucket(bucketName).key(bagKey)).source(bagFile.toPath()) - .build(); + UploadFileRequest uploadFileRequest = UploadFileRequest.builder() + .putObjectRequest(req -> req.bucket(bucketName).key(bagKey)).source(bagFile.toPath()) + .build(); - FileUpload fileUpload = tm.uploadFile(uploadFileRequest); + FileUpload fileUpload = tm.uploadFile(uploadFileRequest); - CompletedFileUpload uploadResult = fileUpload.completionFuture().join(); + CompletedFileUpload uploadResult = fileUpload.completionFuture().join(); - if (uploadResult.response().sdkHttpResponse().isSuccessful()) { - logger.fine("S3 Submission step: Content Transferred"); + if (uploadResult.response().sdkHttpResponse().isSuccessful()) { + logger.fine("S3 Submission step: Content Transferred"); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, - String.format("https://%s.s3.amazonaws.com/%s", bucketName, bagKey)); - } else { - logger.severe("Error sending file to S3: " + fileName); - return new Failure("Error in transferring Bag file to S3", - "S3 Submission Failure: incomplete transfer"); - } + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, + String.format("https://%s.s3.amazonaws.com/%s", bucketName, bagKey)); } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); } - } else { - logger.warning( - "S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); - return new Failure("Dataset locked"); + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); } + } catch (Exception e) { logger.warning(e.getLocalizedMessage()); e.printStackTrace(); @@ -253,7 +242,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } else { return new Failure( - "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); + "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index f24ebdb8655..12501d170d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.Map.Entry; @@ -72,6 +73,7 @@ import com.google.gson.JsonSyntaxException; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -120,10 +122,10 @@ public class BagGenerator { private boolean usetemp = false; - private int numConnections = 8; - public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); + private Map terms; - private OREMap oremap; + private static int numConnections = 8; + public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); static PrintWriter pw = null; @@ -139,15 +141,15 @@ public class BagGenerator { * and zipping are done in parallel, using a connection pool. The required space * on disk is ~ n+1/n of the final bag size, e.g. 125% of the bag size for a * 4-way parallel zip operation. + * @param terms * @throws Exception * @throws JsonSyntaxException */ - public BagGenerator(OREMap oreMap, String dataciteXml) throws JsonSyntaxException, Exception { - this.oremap = oreMap; - this.oremapObject = oreMap.getOREMap(); - //(JsonObject) new JsonParser().parse(oreMap.getOREMap().toString()); + public BagGenerator(jakarta.json.JsonObject oremapObject, String dataciteXml, Map terms) throws JsonSyntaxException, Exception { + this.oremapObject = oremapObject; this.dataciteXml = dataciteXml; + this.terms = terms; try { // Using Dataverse, all the URLs to be retrieved should be on the current server, so allowing self-signed certs and not verifying hostnames are useful in testing and @@ -768,12 +770,12 @@ private String generateInfoFile() { /* Contact, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change * so we need to find the labels used. */ - JsonLDTerm contactTerm = oremap.getContactTerm(); + JsonLDTerm contactTerm = terms.get(DatasetFieldConstant.datasetContact); if ((contactTerm != null) && aggregation.has(contactTerm.getLabel())) { JsonElement contacts = aggregation.get(contactTerm.getLabel()); - JsonLDTerm contactNameTerm = oremap.getContactNameTerm(); - JsonLDTerm contactEmailTerm = oremap.getContactEmailTerm(); + JsonLDTerm contactNameTerm = terms.get(DatasetFieldConstant.datasetContactName); + JsonLDTerm contactEmailTerm = terms.get(DatasetFieldConstant.datasetContactEmail); if (contacts.isJsonArray()) { for (int i = 0; i < contactsArray.size(); i++) { @@ -841,8 +843,8 @@ private String generateInfoFile() { /* Description, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change * so we need to find the labels used. */ - JsonLDTerm descriptionTerm = oremap.getDescriptionTerm(); - JsonLDTerm descriptionTextTerm = oremap.getDescriptionTextTerm(); + JsonLDTerm descriptionTerm = terms.get(DatasetFieldConstant.description); + JsonLDTerm descriptionTextTerm = terms.get(DatasetFieldConstant.descriptionText); if (descriptionTerm == null) { logger.warning("No description available for BagIt Info file"); } else { @@ -1124,9 +1126,9 @@ public void setAuthenticationKey(String tokenString) { apiKey = tokenString; } - public void setNumConnections(int numConnections) { - this.numConnections = numConnections; - logger.fine("BagGenerator will use " + numConnections + " threads"); + public static void setNumConnections(int numConnections) { + BagGenerator.numConnections = numConnections; + logger.fine("All BagGenerators will now use " + numConnections + " threads"); } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index ae1175f0e1d..d7fc3f96b02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -290,7 +290,7 @@ private void executeSteps(Workflow wf, WorkflowContext ctxt, int initialStepIdx try { if (res == WorkflowStepResult.OK) { logger.log(Level.INFO, "Workflow {0} step {1}: OK", new Object[]{ctxt.getInvocationId(), stepIdx}); - em.merge(ctxt.getDataset()); + // The dataset is merged in refresh(ctxt) ctxt = refresh(ctxt); } else if (res instanceof Failure) { logger.log(Level.WARNING, "Workflow {0} failed: {1}", new Object[]{ctxt.getInvocationId(), ((Failure) res).getReason()}); diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java index b0567bff107..c6a5c8626ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java @@ -1,9 +1,14 @@ package edu.harvard.iq.dataverse.workflow.internalspi; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.ArchiverUtil; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.WorkflowContext; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStep; @@ -14,6 +19,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import jakarta.json.JsonObject; import jakarta.servlet.http.HttpServletRequest; /** @@ -45,11 +51,43 @@ public WorkflowStepResult run(WorkflowContext context) { } } + Dataset d = context.getDataset(); + if (d.isLockedFor(Reason.FileValidationFailed)) { + logger.severe("Dataset locked for file validation failure - will not archive"); + return new Failure("File Validation Lock", "Dataset has file validation problem - will not archive"); + } DataverseRequest dvr = new DataverseRequest(context.getRequest().getAuthenticatedUser(), (HttpServletRequest) null); String className = requestedSettings.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvr, context.getDataset().getReleasedVersion()); if (archiveCommand != null) { - return (archiveCommand.performArchiveSubmission(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings)); + // Generate the required components for archiving + DatasetVersion version = context.getDataset().getReleasedVersion(); + + // Generate DataCite XML + String dataCiteXml = archiveCommand.getDataCiteXml(version); + + // Generate OREMap + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + + // Get JSON-LD terms + Map terms = archiveCommand.getJsonLDTerms(oreMap); + + // Call the updated method with all required parameters + /* + * Note: because this must complete before the workflow can complete and update the version status in the db a long-running archive submission via workflow could hit a transaction timeout and fail. + * The commands themselves have been updated to run archive submission outside of any transaction and update the status in a separate transaction, so archiving a given version that way could succeed + * where this workflow failed. + */ + return archiveCommand.performArchiveSubmission( + version, + dataCiteXml, + ore, + terms, + context.getApiToken(), + requestedSettings + ); + } else { logger.severe("No Archiver instance could be created for name: " + className); return new Failure("No Archiver", "Could not create instance of class: " + className); From d2a25c392c4434d960871f13a6ed8f86458fc3f0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 09:58:33 -0500 Subject: [PATCH 17/22] use new transaction to start --- .../dataverse/DatasetVersionServiceBean.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index b5e964e5673..9c04acd6c5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -28,6 +28,8 @@ import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonObjectBuilder; @@ -1337,25 +1339,21 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer /** - * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred so this method will check - * for OptimisticLockExceptions and retry the update with the latest version. + * Update the archival copy location for a specific version of a dataset. + * Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * so this method will just re-find the version rather than risking an + * OptimisticLockException and then having to retry in yert another transaction (since the OLE rolls this one back). * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void persistArchivalCopyLocation(DatasetVersion dv) { - try { - em.merge(dv); - em.flush(); // Force the update and version check immediately - } catch (OptimisticLockException ole) { - logger.log(Level.INFO, "OptimisticLockException while persisting archival copy location for DatasetVersion id={0}. Retrying on latest version.", dv.getId()); - DatasetVersion currentVersion = find(dv.getId()); - if (currentVersion != null) { - currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); - em.merge(currentVersion); - } else { - logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); - } + DatasetVersion currentVersion = find(dv.getId()); + if (currentVersion != null) { + currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); + } else { + logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); } } } From a45b76b2cc4ad1d6e6ea324eeb51cb3cfcc37189 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 09:59:00 -0500 Subject: [PATCH 18/22] typo --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 9c04acd6c5e..a5dd724104f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1342,7 +1342,7 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer * Update the archival copy location for a specific version of a dataset. * Archiving can be long-running and other parallel updates to the datasetversion have likely occurred * so this method will just re-find the version rather than risking an - * OptimisticLockException and then having to retry in yert another transaction (since the OLE rolls this one back). + * OptimisticLockException and then having to retry in yet another transaction (since the OLE rolls this one back). * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. From a4c583e1e6ab3db3cb5c8e570ca7ffbd1867d567 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 10:38:13 -0500 Subject: [PATCH 19/22] Use pending, use JSON --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 14 ++++++++++---- .../edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- .../edu/harvard/iq/dataverse/api/Datasets.java | 4 +++- .../ArchivalSubmissionWorkflowStep.java | 12 +++++++++--- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index a091005b392..7e168047f05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3004,7 +3004,11 @@ public String updateCurrentVersion() { String status = updateVersion.getArchivalCopyLocationStatus(); if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + //Persist to db now + datasetVersionService.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this * command within the CuratePublishedDatasetVersionCommand was causing an error: @@ -3023,9 +3027,8 @@ public String updateCurrentVersion() { } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { //Not automatically replacing the old archival copy as creating it is expensive - updateVersion.setArchivalStatus(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + updateVersion.setArchivalStatusOnly(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); datasetVersionService.persistArchivalCopyLocation(updateVersion); - datasetVersionService.merge(updateVersion); } } } @@ -6115,7 +6118,10 @@ public void archiveVersion(Long id, boolean force) { if (status == null || (force && cmd.canDelete())) { // Set initial pending status - dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + //Persist now datasetVersionService.persistArchivalCopyLocation(dv); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 1248a8266ab..4ff6ae5e723 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -416,8 +416,8 @@ public void setArchivalCopyLocation(String location) { populateArchivalStatus(true); } - // COnvenience method to set only the status - public void setArchivalStatus(String status) { + // Convenience method to just change the status without changing the location + public void setArchivalStatusOnly(String status) { populateArchivalStatus(false); JsonObjectBuilder job = Json.createObjectBuilder(archivalCopyLocationJson); job.add(DatasetVersion.ARCHIVAL_STATUS, status); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dba4b36d4da..155522bbb5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,9 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java index c6a5c8626ae..9e9b434ba03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java @@ -75,9 +75,15 @@ public WorkflowStepResult run(WorkflowContext context) { // Call the updated method with all required parameters /* - * Note: because this must complete before the workflow can complete and update the version status in the db a long-running archive submission via workflow could hit a transaction timeout and fail. - * The commands themselves have been updated to run archive submission outside of any transaction and update the status in a separate transaction, so archiving a given version that way could succeed - * where this workflow failed. + * Note: because this must complete before the workflow can complete and update the version status + * in the db a long-running archive submission via workflow could hit a transaction timeout and fail. + * The commands themselves have been updated to run archive submission outside of any transaction + * and update the status in a separate transaction, so archiving a given version that way could + * succeed where this workflow failed. + * + * Another difference when running in a workflow - this step has no way to set the archiving status to + * pending as is done when running archiving from the UI/API. Instead, there is a generic workflow + * lock on the dataset. */ return archiveCommand.performArchiveSubmission( version, From 305f7e3b73f7ec299bb25a86bddf645f34f23607 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jan 2026 17:38:37 -0500 Subject: [PATCH 20/22] merge fix of persistArchivalCopy method refactors --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4ff6ae5e723..8a4a0cf3f53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -392,7 +392,7 @@ public String getArchivalCopyLocationStatus() { } public String getArchivalCopyLocationMessage() { populateArchivalStatus(false); - if(archivalCopyLocationJson!=null) { + if(archivalCopyLocationJson!=null && archivalCopyLocationJson.containsKey(ARCHIVAL_STATUS_MESSAGE)) { return archivalCopyLocationJson.getString(ARCHIVAL_STATUS_MESSAGE); } return null; From d2282d9d02280f27491f3a87dbb9ac39ee6794ac Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 28 Jan 2026 15:20:14 -0500 Subject: [PATCH 21/22] combined release note --- doc/release-notes/12122-archiving updates.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/release-notes/12122-archiving updates.md diff --git a/doc/release-notes/12122-archiving updates.md b/doc/release-notes/12122-archiving updates.md new file mode 100644 index 00000000000..2dd4eb6909f --- /dev/null +++ b/doc/release-notes/12122-archiving updates.md @@ -0,0 +1,8 @@ +## Notifications + +This release includes multiple updates to the process of creating archival bags including +- performance/scaling improvements for large datasets (multiple changes) +- bug fixes for when superusers see the "Submit" button to launch archiving from the dataset page version table +- new functionality to optionally suppress an archiving workflow when using the Update Current Version functionality and mark the current archive as out of date +- new functionality to support recreating an archival bag when Update Current Version has been used, which is available for archivers that can delete existing files +- \ No newline at end of file From 236fca47f9f5e57792c8201fd34fadc992f2c6ec Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 28 Jan 2026 15:30:21 -0500 Subject: [PATCH 22/22] missed change to static --- .../engine/command/impl/AbstractSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index ffa79456902..2b049f1c42a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -156,7 +156,7 @@ public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInput public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); + BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads()); BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out);