From f8f7739423c1f1af8fa7b1d1092b73523181a285 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 7 Jan 2026 13:35:19 -0500 Subject: [PATCH 1/8] initial impl --- .../impl/AbstractSubmitToArchiveCommand.java | 83 +++++++++++++++++-- .../settings/SettingsServiceBean.java | 6 ++ .../ArchivalSubmissionWorkflowStep.java | 2 +- 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 29c27d0396d..b4400e7b957 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -15,15 +15,21 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import edu.harvard.iq.dataverse.util.ListSplitUtil; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; import java.io.IOException; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.security.DigestInputStream; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.logging.Logger; @@ -45,14 +51,16 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { + + String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); - String[] settingsArray = settings.split(","); - for (String setting : settingsArray) { - setting = setting.trim(); - if (!setting.startsWith(":")) { - logger.warning("Invalid Archiver Setting: " + setting); + List settingsList = ListSplitUtil.split(settings); + for (String settingName : settingsList) { + Key setting = Key.parse(settingName); + if (setting == null) { + logger.warning("Invalid Archiver Setting: " + settingName); } else { - requestedSettings.put(setting, ctxt.settings().get(setting)); + requestedSettings.put(settingName, ctxt.settings().getValueForKey(setting)); } } @@ -62,22 +70,81 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //No un-expired token token = ctxt.authentication().generateApiTokenForUser(user); } - performArchiveSubmission(version, token, requestedSettings); + runArchivingProcess(version, token, requestedSettings); return ctxt.em().merge(version); } + /** + * Note that this method may be called from the execute method above OR from a + * workflow in which execute() is never called and therefore in which all + * variables must be sent as method parameters. (Nominally version is set in the + * constructor and could be dropped from the parameter list.) + * @param ctxt + * + * @param version - the DatasetVersion to archive + * @param token - an API Token for the user performing this action + * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). + */ + public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map requestedSetttings) { + // Check if earlier versions must be archived first + String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString()); + boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue); + if (requireEarlierArchived) { + + Dataset dataset = version.getDataset(); + List versions = dataset.getVersions(); + + // Check all earlier versions (those with version numbers less than current) + for (DatasetVersion earlierVersion : versions) { + // Skip the current version and any versions that come after it + if (earlierVersion.getId().equals(version.getId())) { + continue; + } + + // Compare version numbers to ensure we only check earlier versions + if (earlierVersion.getVersionNumber() != null && version.getVersionNumber() != null) { + if (earlierVersion.getVersionNumber() < version.getVersionNumber() + || (earlierVersion.getVersionNumber().equals(version.getVersionNumber()) + && earlierVersion.getMinorVersionNumber() < version.getMinorVersionNumber())) { + + // Check if this earlier version has been successfully archived + String archivalStatus = earlierVersion.getArchivalCopyLocationStatus(); + if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) +// || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE) + ) { + JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder(); + statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, + "Successful archiving of earlier versions is required."); + version.setArchivalCopyLocation(statusObjectBuilder.build().toString()); + return new Failure( + "Earlier versions must be successfully archived first", + "Archival prerequisites not met" + ); + } + } + } + } + } + // Delegate to the archiver-specific implementation + return performArchiveSubmission(version, token, requestedSettings); + } + + /** * This method is the only one that should be overwritten by other classes. Note * that this method may be called from the execute method above OR from a * workflow in which execute() is never called and therefore in which all * variables must be sent as method parameters. (Nominally version is set in the * constructor and could be dropped from the parameter list.) + * @param ctxt * * @param version - the DatasetVersion to archive * @param token - an API Token for the user performing this action * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). */ - abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, Map requestedSetttings); + protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, + Map requestedSettings); protected int getNumberOfBagGeneratorThreads() { if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 37d26995017..d8495a2dc8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -485,6 +485,12 @@ Whether Harvesting (OAI) service is enabled */ ArchiverClassName, + /* + * Only create an archival Bag for a dataset version if all prior versions have + * been successfully archived + */ + ArchiverOnlyIfEarlierVersionsAreArchived, + /** * Custom settings for each archiver. See list below. */ diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java index b0567bff107..3e3962d0334 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java @@ -49,7 +49,7 @@ public WorkflowStepResult run(WorkflowContext context) { String className = requestedSettings.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvr, context.getDataset().getReleasedVersion()); if (archiveCommand != null) { - return (archiveCommand.performArchiveSubmission(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings)); + return (archiveCommand.runArchivingProcess(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings)); } else { logger.severe("No Archiver instance could be created for name: " + className); return new Failure("No Archiver", "Could not create instance of class: " + className); From 5bd6f8d92581ed8ee6e65b4cb394d0f67be804cc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 8 Jan 2026 09:25:41 -0500 Subject: [PATCH 2/8] fix requestedSettings handling --- .../impl/AbstractSubmitToArchiveCommand.java | 15 ++++++--------- .../command/impl/DRSSubmitToArchiveCommand.java | 5 ++--- .../impl/DuraCloudSubmitToArchiveCommand.java | 3 +-- .../impl/GoogleCloudSubmitToArchiveCommand.java | 2 +- .../command/impl/LocalSubmitToArchiveCommand.java | 3 +-- .../command/impl/S3SubmitToArchiveCommand.java | 3 +-- .../dataverse/workflow/WorkflowServiceBean.java | 4 ++-- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index b4400e7b957..bcb8f37dede 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -37,7 +36,7 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand { private final DatasetVersion version; - private final Map requestedSettings = new HashMap(); + protected final Map requestedSettings = new HashMap(); protected boolean success=false; private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); private static final int MAX_ZIP_WAIT = 20000; @@ -50,8 +49,6 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - - String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); List settingsList = ListSplitUtil.split(settings); @@ -85,7 +82,9 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { * @param token - an API Token for the user performing this action * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). */ - public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map requestedSetttings) { + public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map requestedSettings) { + // this.requestedSettings won't be set yet in the workflow case, so set it now (used in getNumberOfBagGeneratorThreads) + this.requestedSettings.putAll(requestedSettings); // Check if earlier versions must be archived first String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString()); boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue); @@ -127,7 +126,7 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t } } // Delegate to the archiver-specific implementation - return performArchiveSubmission(version, token, requestedSettings); + return performArchiveSubmission(version, token); } @@ -141,10 +140,8 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t * * @param version - the DatasetVersion to archive * @param token - an API Token for the user performing this action - * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). */ - protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, - Map requestedSettings); + protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token); protected int getNumberOfBagGeneratorThreads() { if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 78e8454255b..01b9b4621e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -82,8 +82,7 @@ public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versi } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) { logger.fine("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; @@ -113,7 +112,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); - WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token); JsonObjectBuilder statusObject = Json.createObjectBuilder(); statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index fe4a25091d7..71855abd927 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -49,8 +49,7 @@ public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) { String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 7dfb9f07e19..5d27e71583b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -45,7 +45,7 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); String bucketName = requestedSettings.get(GOOGLECLOUD_BUCKET); String projectName = requestedSettings.get(GOOGLECLOUD_PROJECT); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 462879f2ec9..d590e605985 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -36,8 +36,7 @@ public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion ver } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) { logger.fine("In LocalCloudSubmitToArchive..."); String localPath = requestedSettings.get(BagItLocalPath.toString()); String zipName = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 65531d775c8..e3d5a0d8ae0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -78,8 +78,7 @@ public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versio } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) { logger.fine("In S3SubmitToArchiveCommand..."); JsonObject configObject = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index ae1175f0e1d..fce13d1c181 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -180,12 +180,12 @@ private Map retrieveRequestedSettings(Map requir break; } case "boolean": { - retrievedSettings.put(setting, settings.isTrue(settingType, false)); + retrievedSettings.put(setting, settings.isTrue(setting, false)); break; } case "long": { retrievedSettings.put(setting, - settings.getValueForKeyAsLong(SettingsServiceBean.Key.valueOf(setting))); + settings.getValueForKeyAsLong(SettingsServiceBean.Key.parse(setting))); break; } } From 4aaf6ca3ceff1f772dad5821e3f7a8b76342060d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 8 Jan 2026 09:26:00 -0500 Subject: [PATCH 3/8] efficiency improvement --- .../iq/dataverse/settings/SettingsServiceBean.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index d8495a2dc8a..1c67cb85060 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -802,16 +802,13 @@ public static SettingsServiceBean.Key parse(String key) { // Cut off the ":" we verified is present before String normalizedKey = key.substring(1); - // Iterate through all the known keys and return on match (case sensitive!) // We are case sensitive here because Dataverse implicitely uses case sensitive keys everywhere! - for (SettingsServiceBean.Key k : SettingsServiceBean.Key.values()) { - if (k.name().equals(normalizedKey)) { - return k; - } + try { + return SettingsServiceBean.Key.valueOf(normalizedKey); + } catch (IllegalArgumentException e) { + // Fall through on no match - return null for invalid keys + return null; } - - // Fall through on no match - return null; } } From 7cdef818079a6c3aa253063e1c68b41a54c4c0ed Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 8 Jan 2026 09:40:53 -0500 Subject: [PATCH 4/8] QDR fixes transx timeout, ignored bag thread setting, add deletable --- .../impl/AbstractSubmitToArchiveCommand.java | 18 +++++++++++++++++- .../iq/dataverse/util/bagit/BagGenerator.java | 8 ++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index bcb8f37dede..98e9dfb68e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -20,6 +20,9 @@ import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.json.Json; import jakarta.json.JsonObjectBuilder; @@ -48,6 +51,7 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) public DatasetVersion execute(CommandContext ctxt) throws CommandException { String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); @@ -174,8 +178,8 @@ public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInput public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag + BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads()); BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); success = true; @@ -247,4 +251,16 @@ public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } + + /** Whether the archiver can delete existing archival files (and thus can retry when the existing files are incomplete/obsolete) + * A static version supports calls via reflection while the instance method supports inheritance for use on actual command instances (see DatasetPage for both use cases). + * @return + */ + public static boolean supportsDelete() { + return false; + } + + public boolean canDelete() { + return supportsDelete(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index f24ebdb8655..3035694ae3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -120,7 +120,7 @@ public class BagGenerator { private boolean usetemp = false; - private int numConnections = 8; + private static int numConnections = 2; public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); private OREMap oremap; @@ -1124,9 +1124,9 @@ public void setAuthenticationKey(String tokenString) { apiKey = tokenString; } - public void setNumConnections(int numConnections) { - this.numConnections = numConnections; - logger.fine("BagGenerator will use " + numConnections + " threads"); + public static void setNumConnections(int numConnections) { + BagGenerator.numConnections = numConnections; + logger.fine("All BagGenerators will use " + numConnections + " threads"); } } \ No newline at end of file From 67e01e050d933005f69b4bae93678c865671130a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 16 Dec 2025 10:35:40 -0500 Subject: [PATCH 5/8] archival submit fix - per version cache --- .../edu/harvard/iq/dataverse/DatasetPage.java | 28 +++++++++++++------ src/main/webapp/dataset-versions.xhtml | 4 +-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 20617160a1c..8eba6cbeab9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -387,7 +387,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { private boolean showIngestSuccess; private Boolean archivable = null; - private Boolean versionArchivable = null; + private HashMap versionArchivable = new HashMap<>(); private Boolean someVersionArchived = null; public boolean isShowIngestSuccess() { @@ -6147,10 +6147,11 @@ public boolean isArchivable() { return archivable; } - public boolean isVersionArchivable() { - if (versionArchivable == null) { + public boolean isVersionArchivable(Long id) { + Boolean thisVersionArchivable = versionArchivable.get(id); + if (thisVersionArchivable == null) { // If this dataset isn't in an archivable collection return false - versionArchivable = false; + thisVersionArchivable = false; if (isArchivable()) { boolean checkForArchivalCopy = false; // Otherwise, we need to know if the archiver is single-version-only @@ -6167,11 +6168,19 @@ public boolean isVersionArchivable() { if (checkForArchivalCopy) { // If we have to check (single version archiving), we can't allow archiving if // one version is already archived (or attempted - any non-null status) - versionArchivable = !isSomeVersionArchived(); + thisVersionArchivable = !isSomeVersionArchived(); } else { - // If we allow multiple versions or didn't find one that has had archiving run - // on it, we can archive, so return true - versionArchivable = true; + // If we didn't find one that has had archiving run + // on it, or archiving per version is supported and either + // the status is null or the archiver can delete prior runs and status isn't success, + // we can archive, so return true + // Find the specific version by id + DatasetVersion targetVersion = dataset.getVersions().stream() + .filter(v -> v.getId().equals(id)) + .findFirst() + .orElse(null); + String status = targetVersion.getArchivalCopyLocationStatus(); + thisVersionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete)); } } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { @@ -6180,8 +6189,9 @@ public boolean isVersionArchivable() { } } } + versionArchivable.put(id, thisVersionArchivable); } - return versionArchivable; + return thisVersionArchivable; } public boolean isSomeVersionArchived() { diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 9e5f0a9b24d..ee726bb5eee 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -171,11 +171,11 @@ - - + From 50e8c61a8e1a878a7056f0da980e1a9e5271f957 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 29 Jan 2026 17:20:31 -0500 Subject: [PATCH 6/8] Add check to display submit button only if prior versions are archvd --- .../edu/harvard/iq/dataverse/DatasetPage.java | 34 +++++++++++++------ .../dataverse/FileMetadataVersionsHelper.java | 14 ++------ .../iq/dataverse/dataset/DatasetUtil.java | 17 ++++++++++ .../iq/dataverse/util/ArchiverUtil.java | 11 ++++++ 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 8eba6cbeab9..375489484c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -387,6 +387,8 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { private boolean showIngestSuccess; private Boolean archivable = null; + private Boolean checkForArchivalCopy; + private Boolean supportsDelete; private HashMap versionArchivable = new HashMap<>(); private Boolean someVersionArchived = null; @@ -6152,19 +6154,33 @@ public boolean isVersionArchivable(Long id) { if (thisVersionArchivable == null) { // If this dataset isn't in an archivable collection return false thisVersionArchivable = false; + boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived, false); if (isArchivable()) { - boolean checkForArchivalCopy = false; // Otherwise, we need to know if the archiver is single-version-only // If it is, we have to check for an existing archived version to answer the // question String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { - Class clazz = Class.forName(className); - Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); - Object[] params = { settingsWrapper }; - checkForArchivalCopy = (Boolean) m.invoke(null, params); - + DatasetVersion targetVersion = dataset.getVersions().stream() + .filter(v -> v.getId().equals(id)).findFirst().orElse(null); + if (requiresEarlierVersionsToBeArchived) {// Find the specific version by id + DatasetVersion priorVersion = DatasetUtil.getPriorVersion(targetVersion); + + if (priorVersion== null || (isVersionArchivable(priorVersion.getId()) + && ArchiverUtil.isVersionArchived(priorVersion))) { + thisVersionArchivable = true; + } + } + if (checkForArchivalCopy == null) { + //Only check once + Class clazz = Class.forName(className); + Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Method m2 = clazz.getMethod("supportsDelete"); + Object[] params = { settingsWrapper }; + checkForArchivalCopy = (Boolean) m.invoke(null, params); + supportsDelete = (Boolean) m2.invoke(null); + } if (checkForArchivalCopy) { // If we have to check (single version archiving), we can't allow archiving if // one version is already archived (or attempted - any non-null status) @@ -6175,16 +6191,12 @@ public boolean isVersionArchivable(Long id) { // the status is null or the archiver can delete prior runs and status isn't success, // we can archive, so return true // Find the specific version by id - DatasetVersion targetVersion = dataset.getVersions().stream() - .filter(v -> v.getId().equals(id)) - .findFirst() - .orElse(null); String status = targetVersion.getArchivalCopyLocationStatus(); thisVersionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete)); } } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { - logger.warning("Failed to call isSingleVersion on configured archiver class: " + className); + logger.warning("Failed to call methods on configured archiver class: " + className); e.printStackTrace(); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java index 4d408a72c8c..cc632054642 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; @@ -95,18 +96,7 @@ private FileMetadata getPreviousFileMetadata(FileMetadata fileMetadata, FileMeta //TODO: this could use some refactoring to cut down on the number of for loops! private FileMetadata getPreviousFileMetadata(FileMetadata fileMetadata, DatasetVersion currentversion) { List allfiles = allRelatedFiles(fileMetadata); - boolean foundCurrent = false; - DatasetVersion priorVersion = null; - for (DatasetVersion versionLoop : fileMetadata.getDatasetVersion().getDataset().getVersions()) { - if (foundCurrent) { - priorVersion = versionLoop; - break; - } - if (versionLoop.equals(currentversion)) { - foundCurrent = true; - } - - } + DatasetVersion priorVersion = DatasetUtil.getPriorVersion(fileMetadata.getDatasetVersion()); if (priorVersion != null && priorVersion.getFileMetadatasSorted() != null) { for (FileMetadata fmdTest : priorVersion.getFileMetadatasSorted()) { for (DataFile fileTest : allfiles) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 2ce5471a523..79451a61a84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -740,4 +740,21 @@ public static String getLocaleCurationStatusLabelFromString(String label) { } return localizedName; } + + // Find the prior version - relies on version sorting by major/minor numbers + public static DatasetVersion getPriorVersion(DatasetVersion version) { + boolean foundCurrent = false; + DatasetVersion priorVersion = null; + for (DatasetVersion versionLoop : version.getDataset().getVersions()) { + if (foundCurrent) { + priorVersion = versionLoop; + break; + } + if (versionLoop.equals(version)) { + foundCurrent = true; + } + + } + return priorVersion; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java index 18ec6243d5a..7d03004f3f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java @@ -71,5 +71,16 @@ public static boolean isSomeVersionArchived(Dataset dataset) { return someVersionArchived; } + + /** + * Checks if a version has been successfully archived. + * + * @param version the version to check + * @return true if the version has been successfully archived, false otherwise + */ + public static boolean isVersionArchived(DatasetVersion version) { + String status = version.getArchivalCopyLocationStatus(); + return status != null && status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + } } \ No newline at end of file From 06428970ac2a7644f7ca7b5d6c929ad639c20c31 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 29 Jan 2026 18:06:43 -0500 Subject: [PATCH 7/8] setting name tweak, add docs, release note --- doc/release-notes/12122-archiving in sequence.md | 3 +++ doc/sphinx-guides/source/installation/config.rst | 12 ++++++++++++ .../java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../command/impl/AbstractSubmitToArchiveCommand.java | 2 +- .../iq/dataverse/settings/SettingsServiceBean.java | 2 +- 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 doc/release-notes/12122-archiving in sequence.md diff --git a/doc/release-notes/12122-archiving in sequence.md b/doc/release-notes/12122-archiving in sequence.md new file mode 100644 index 00000000000..6f4373a1e31 --- /dev/null +++ b/doc/release-notes/12122-archiving in sequence.md @@ -0,0 +1,3 @@ +This release introduces an additial setting related to archival bag creation, ArchiveOnlyIfEarlierVersionsAreArchived (default false). +If it is true, dataset versions must be archived in order. That is, all prior versions of a dataset must be archived before the latest version can be archived. +This is intended to support use cases where deduplication of files between dataset versions will be done (i.e. by a third-party service running at the archival copy location) and is a step towards supporting the Oxford Common File Layout (OCFL) as an archival format. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a9d5c7c0041..e0dffd10ac9 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2263,6 +2263,13 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +Two settings that can be used with all current Archivers are: + +- \:BagGeneratorThreads - the number of threads to use when adding data files to the zipped bag. The default is 2. Values of 4 or more may increase performance on larger machines but may cause problems if file access is throttled +- \:ArchiveOnlyIfEarlierVersionsAreArchived - when true, requires dataset versions to be archived in order by confirming that all prior versions have been successfully archived before allowing a new version to be archived. Default is false + +These must be included in the \:ArchiverSettings for the Archiver to work + .. _Duracloud Configuration: Duracloud Configuration @@ -5333,6 +5340,11 @@ This setting specifies which storage system to use by identifying the particular For examples, see the specific configuration above in :ref:`BagIt Export`. +:ArchiveOnlyIfEarlierVersionsAreArchived +++++++++++++++++++++++++++++++++++++++++ + +This setting, if true, only allows creation of an archival Bag for a dataset version if all prior versions have been successfully archived. The default is false (any version can be archived independently as long as other settings allow it) + :ArchiverSettings +++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 375489484c0..5b267007887 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6154,7 +6154,7 @@ public boolean isVersionArchivable(Long id) { if (thisVersionArchivable == null) { // If this dataset isn't in an archivable collection return false thisVersionArchivable = false; - boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived, false); + boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiveOnlyIfEarlierVersionsAreArchived, false); if (isArchivable()) { // Otherwise, we need to know if the archiver is single-version-only // If it is, we have to check for an existing archived version to answer the diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 98e9dfb68e1..72f45ab5d2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -90,7 +90,7 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t // this.requestedSettings won't be set yet in the workflow case, so set it now (used in getNumberOfBagGeneratorThreads) this.requestedSettings.putAll(requestedSettings); // Check if earlier versions must be archived first - String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString()); + String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiveOnlyIfEarlierVersionsAreArchived.toString()); boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue); if (requireEarlierArchived) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index fac136042ce..1cc9fda7645 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -489,7 +489,7 @@ Whether Harvesting (OAI) service is enabled * Only create an archival Bag for a dataset version if all prior versions have * been successfully archived */ - ArchiverOnlyIfEarlierVersionsAreArchived, + ArchiveOnlyIfEarlierVersionsAreArchived, /** * Custom settings for each archiver. See list below. From ca0af05c7f80fc28114ebde71b460917199d2a60 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 29 Jan 2026 18:15:55 -0500 Subject: [PATCH 8/8] simplify --- .../impl/AbstractSubmitToArchiveCommand.java | 47 +++++++++---------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 72f45ab5d2b..8949f346567 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -97,36 +97,31 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t Dataset dataset = version.getDataset(); List versions = dataset.getVersions(); - // Check all earlier versions (those with version numbers less than current) - for (DatasetVersion earlierVersion : versions) { - // Skip the current version and any versions that come after it - if (earlierVersion.getId().equals(version.getId())) { - continue; - } - - // Compare version numbers to ensure we only check earlier versions - if (earlierVersion.getVersionNumber() != null && version.getVersionNumber() != null) { - if (earlierVersion.getVersionNumber() < version.getVersionNumber() - || (earlierVersion.getVersionNumber().equals(version.getVersionNumber()) - && earlierVersion.getMinorVersionNumber() < version.getMinorVersionNumber())) { + boolean foundCurrent = false; - // Check if this earlier version has been successfully archived - String archivalStatus = earlierVersion.getArchivalCopyLocationStatus(); - if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) + // versions are ordered, all versions after the current one have lower + // major/minor version numbers + for (DatasetVersion versionInLoop : versions) { + if (foundCurrent) { + // Once foundCurrent is true, we are looking at prior versions + // Check if this earlier version has been successfully archived + String archivalStatus = versionInLoop.getArchivalCopyLocationStatus(); + if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) // || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE) - ) { - JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder(); - statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); - statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, - "Successful archiving of earlier versions is required."); - version.setArchivalCopyLocation(statusObjectBuilder.build().toString()); - return new Failure( - "Earlier versions must be successfully archived first", - "Archival prerequisites not met" - ); - } + ) { + JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder(); + statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, + "Successful archiving of earlier versions is required."); + version.setArchivalCopyLocation(statusObjectBuilder.build().toString()); + return new Failure("Earlier versions must be successfully archived first", + "Archival prerequisites not met"); } } + if (versionInLoop.equals(version)) { + foundCurrent = true; + } + } } // Delegate to the archiver-specific implementation