From f8f7739423c1f1af8fa7b1d1092b73523181a285 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 7 Jan 2026 13:35:19 -0500
Subject: [PATCH 01/14] initial impl

---
 .../impl/AbstractSubmitToArchiveCommand.java  | 83 +++++++++++++++++--
 .../settings/SettingsServiceBean.java         |  6 ++
 .../ArchivalSubmissionWorkflowStep.java       |  2 +-
 3 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
index 29c27d0396d..b4400e7b957 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
@@ -15,15 +15,21 @@
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key;
+import edu.harvard.iq.dataverse.util.ListSplitUtil;
 import edu.harvard.iq.dataverse.util.bagit.BagGenerator;
 import edu.harvard.iq.dataverse.util.bagit.OREMap;
+import edu.harvard.iq.dataverse.workflow.step.Failure;
 import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult;
+import jakarta.json.Json;
+import jakarta.json.JsonObjectBuilder;
 
 import java.io.IOException;
 import java.io.PipedInputStream;
 import java.io.PipedOutputStream;
 import java.security.DigestInputStream;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.logging.Logger;
 
@@ -45,14 +51,16 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
 
+
+        
         String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings);
-        String[] settingsArray = settings.split(",");
-        for (String setting : settingsArray) {
-            setting = setting.trim();
-            if (!setting.startsWith(":")) {
-                logger.warning("Invalid Archiver Setting: " + setting);
+        List<String> settingsList = ListSplitUtil.split(settings);
+        for (String settingName : settingsList) {
+            Key setting = Key.parse(settingName);
+            if (setting == null) {
+                logger.warning("Invalid Archiver Setting: " + settingName);
             } else {
-                requestedSettings.put(setting, ctxt.settings().get(setting));
+                requestedSettings.put(settingName, ctxt.settings().getValueForKey(setting));
             }
         }
         
@@ -62,22 +70,81 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
             //No un-expired token
             token = ctxt.authentication().generateApiTokenForUser(user);
         }
-        performArchiveSubmission(version, token, requestedSettings);
+        runArchivingProcess(version, token, requestedSettings);
         return ctxt.em().merge(version);
     }
 
+    /**
+     * Note that this method may be called from the execute method above OR from a
+     * workflow in which execute() is never called and therefore in which all
+     * variables must be sent as method parameters. (Nominally version is set in the
+     * constructor and could be dropped from the parameter list.)
+     * @param ctxt 
+     * 
+     * @param version - the DatasetVersion to archive
+     * @param token - an API Token for the user performing this action
+     * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans).
+     */
+    public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map<String, String> requestedSetttings) {
+        // Check if earlier versions must be archived first
+        String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString());
+        boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue);
+        if (requireEarlierArchived) {
+        
+            Dataset dataset = version.getDataset();
+            List<DatasetVersion> versions = dataset.getVersions();
+
+            // Check all earlier versions (those with version numbers less than current)
+            for (DatasetVersion earlierVersion : versions) {
+                // Skip the current version and any versions that come after it
+                if (earlierVersion.getId().equals(version.getId())) {
+                    continue;
+                }
+
+                // Compare version numbers to ensure we only check earlier versions
+                if (earlierVersion.getVersionNumber() != null && version.getVersionNumber() != null) {
+                    if (earlierVersion.getVersionNumber() < version.getVersionNumber()
+                            || (earlierVersion.getVersionNumber().equals(version.getVersionNumber())
+                                    && earlierVersion.getMinorVersionNumber() < version.getMinorVersionNumber())) {
+
+                        // Check if this earlier version has been successfully archived
+                        String archivalStatus = earlierVersion.getArchivalCopyLocationStatus();
+                        if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)
+//                                || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE)
+                        ) {
+                            JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder();
+                            statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
+                            statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE,
+                                    "Successful archiving of earlier versions is required.");
+                            version.setArchivalCopyLocation(statusObjectBuilder.build().toString());
+                            return new Failure(
+                                    "Earlier versions must be successfully archived first",
+                                    "Archival prerequisites not met"
+                                );
+                        }
+                    }
+                }
+            }
+        }
+        // Delegate to the archiver-specific implementation
+        return performArchiveSubmission(version, token, requestedSettings);
+    }
+
+
     /**
      * This method is the only one that should be overwritten by other classes. Note
      * that this method may be called from the execute method above OR from a
      * workflow in which execute() is never called and therefore in which all
      * variables must be sent as method parameters. (Nominally version is set in the
      * constructor and could be dropped from the parameter list.)
+     * @param ctxt 
      * 
      * @param version - the DatasetVersion to archive
      * @param token - an API Token for the user performing this action
      * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans).
      */
-    abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, Map<String, String> requestedSetttings);
+   protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token,
+            Map<String, String> requestedSettings);
 
     protected int getNumberOfBagGeneratorThreads() {
         if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 37d26995017..d8495a2dc8a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -485,6 +485,12 @@ Whether Harvesting (OAI) service is enabled
          */
         
         ArchiverClassName,
+        /*
+         * Only create an archival Bag for a dataset version if all prior versions have
+         * been successfully archived
+         */
+        ArchiverOnlyIfEarlierVersionsAreArchived,
+        
         /**
          * Custom settings for each archiver. See list below.
          */
diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java
index b0567bff107..3e3962d0334 100644
--- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java
+++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java
@@ -49,7 +49,7 @@ public WorkflowStepResult run(WorkflowContext context) {
         String className = requestedSettings.get(SettingsServiceBean.Key.ArchiverClassName.toString());
         AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvr, context.getDataset().getReleasedVersion());
         if (archiveCommand != null) {
-            return (archiveCommand.performArchiveSubmission(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings));
+            return (archiveCommand.runArchivingProcess(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings));
         } else {
             logger.severe("No Archiver instance could be created for name: " + className);
             return new Failure("No Archiver", "Could not create instance of class: " + className);

From 5bd6f8d92581ed8ee6e65b4cb394d0f67be804cc Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 8 Jan 2026 09:25:41 -0500
Subject: [PATCH 02/14] fix requestedSettings handling

---
 .../impl/AbstractSubmitToArchiveCommand.java      | 15 ++++++---------
 .../command/impl/DRSSubmitToArchiveCommand.java   |  5 ++---
 .../impl/DuraCloudSubmitToArchiveCommand.java     |  3 +--
 .../impl/GoogleCloudSubmitToArchiveCommand.java   |  2 +-
 .../command/impl/LocalSubmitToArchiveCommand.java |  3 +--
 .../command/impl/S3SubmitToArchiveCommand.java    |  3 +--
 .../dataverse/workflow/WorkflowServiceBean.java   |  4 ++--
 7 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
index b4400e7b957..bcb8f37dede 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
@@ -3,7 +3,6 @@
 import edu.harvard.iq.dataverse.DataCitation;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.DatasetVersion;
-import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.SettingsWrapper;
 import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
@@ -37,7 +36,7 @@
 public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand<DatasetVersion> {
 
     private final DatasetVersion version;
-    private final Map<String, String> requestedSettings = new HashMap<String, String>();
+    protected final Map<String, String> requestedSettings = new HashMap<String, String>();
     protected boolean success=false;
     private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName());
     private static final int MAX_ZIP_WAIT = 20000;
@@ -50,8 +49,6 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion
 
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
-
-
         
         String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings);
         List<String> settingsList = ListSplitUtil.split(settings);
@@ -85,7 +82,9 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
      * @param token - an API Token for the user performing this action
      * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans).
      */
-    public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map<String, String> requestedSetttings) {
+    public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken token, Map<String, String> requestedSettings) {
+        // this.requestedSettings won't be set yet in the workflow case, so set it now (used in getNumberOfBagGeneratorThreads)
+        this.requestedSettings.putAll(requestedSettings);
         // Check if earlier versions must be archived first
         String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString());
         boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue);
@@ -127,7 +126,7 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t
             }
         }
         // Delegate to the archiver-specific implementation
-        return performArchiveSubmission(version, token, requestedSettings);
+        return performArchiveSubmission(version, token);
     }
 
 
@@ -141,10 +140,8 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t
      * 
      * @param version - the DatasetVersion to archive
      * @param token - an API Token for the user performing this action
-     * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans).
      */
-   protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token,
-            Map<String, String> requestedSettings);
+   protected abstract WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token);
 
     protected int getNumberOfBagGeneratorThreads() {
         if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java
index 78e8454255b..01b9b4621e1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java
@@ -82,8 +82,7 @@ public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versi
     }
 
     @Override
-    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token,
-            Map<String, String> requestedSettings) {
+    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) {
         logger.fine("In DRSSubmitToArchiveCommand...");
         JsonObject drsConfigObject = null;
 
@@ -113,7 +112,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t
 
                 JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias);
 
-                WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings);
+                WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token);
 
                 JsonObjectBuilder statusObject = Json.createObjectBuilder();
                 statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java
index fe4a25091d7..71855abd927 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java
@@ -49,8 +49,7 @@ public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion
     }
 
     @Override
-    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token,
-            Map<String, String> requestedSettings) {
+    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) {
 
         String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT)
                 : DEFAULT_PORT;
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
index 7dfb9f07e19..5d27e71583b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
@@ -45,7 +45,7 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi
     }
 
     @Override
-    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map<String, String> requestedSettings) {
+    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) {
         logger.fine("In GoogleCloudSubmitToArchiveCommand...");
         String bucketName = requestedSettings.get(GOOGLECLOUD_BUCKET);
         String projectName = requestedSettings.get(GOOGLECLOUD_PROJECT);
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java
index 462879f2ec9..d590e605985 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java
@@ -36,8 +36,7 @@ public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion ver
     }
 
     @Override
-    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token,
-            Map<String, String> requestedSettings) {
+    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) {
         logger.fine("In LocalCloudSubmitToArchive...");
         String localPath = requestedSettings.get(BagItLocalPath.toString());
         String zipName = null;
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java
index 65531d775c8..e3d5a0d8ae0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java
@@ -78,8 +78,7 @@ public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versio
     }
 
     @Override
-    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token,
-            Map<String, String> requestedSettings) {
+    public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token) {
         logger.fine("In S3SubmitToArchiveCommand...");
         JsonObject configObject = null;
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
index ae1175f0e1d..fce13d1c181 100644
--- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java
@@ -180,12 +180,12 @@ private Map<String, Object> retrieveRequestedSettings(Map<String, String> requir
                 break;
             }
             case "boolean": {
-                retrievedSettings.put(setting, settings.isTrue(settingType, false));
+                retrievedSettings.put(setting, settings.isTrue(setting, false));
                 break;
             }
             case "long": {
                 retrievedSettings.put(setting,
-                        settings.getValueForKeyAsLong(SettingsServiceBean.Key.valueOf(setting)));
+                        settings.getValueForKeyAsLong(SettingsServiceBean.Key.parse(setting)));
                 break;
             }
             }

From 4aaf6ca3ceff1f772dad5821e3f7a8b76342060d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 8 Jan 2026 09:26:00 -0500
Subject: [PATCH 03/14] efficiency improvement

---
 .../iq/dataverse/settings/SettingsServiceBean.java  | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index d8495a2dc8a..1c67cb85060 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -802,16 +802,13 @@ public static SettingsServiceBean.Key parse(String key) {
             // Cut off the ":" we verified is present before
             String normalizedKey = key.substring(1);
             
-            // Iterate through all the known keys and return on match (case sensitive!)
             // We are case sensitive here because Dataverse implicitely uses case sensitive keys everywhere!
-            for (SettingsServiceBean.Key k : SettingsServiceBean.Key.values()) {
-                if (k.name().equals(normalizedKey)) {
-                    return k;
-                }
+            try {
+                return SettingsServiceBean.Key.valueOf(normalizedKey);
+            } catch (IllegalArgumentException e) {
+                // Fall through on no match - return null for invalid keys
+                return null;
             }
-            
-            // Fall through on no match
-            return null;
         }
     }
     

From 7cdef818079a6c3aa253063e1c68b41a54c4c0ed Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 8 Jan 2026 09:40:53 -0500
Subject: [PATCH 04/14] QDR fixes transx timeout, ignored bag thread setting,
 add deletable

---
 .../impl/AbstractSubmitToArchiveCommand.java   | 18 +++++++++++++++++-
 .../iq/dataverse/util/bagit/BagGenerator.java  |  8 ++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
index bcb8f37dede..98e9dfb68e1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
@@ -20,6 +20,9 @@
 import edu.harvard.iq.dataverse.util.bagit.OREMap;
 import edu.harvard.iq.dataverse.workflow.step.Failure;
 import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult;
+
+import jakarta.ejb.TransactionAttribute;
+import jakarta.ejb.TransactionAttributeType;
 import jakarta.json.Json;
 import jakarta.json.JsonObjectBuilder;
 
@@ -48,6 +51,7 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion
     }
 
     @Override
+    @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
         
         String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings);
@@ -174,8 +178,8 @@ public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInput
             public void run() {
                 try (PipedOutputStream out = new PipedOutputStream(in)) {
                     // Generate bag
+                    BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads());
                     BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml);
-                    bagger.setNumConnections(getNumberOfBagGeneratorThreads());
                     bagger.setAuthenticationKey(token.getTokenString());
                     bagger.generateBag(out);
                     success = true;
@@ -247,4 +251,16 @@ public static boolean isSingleVersion(SettingsWrapper settingsWrapper) {
    public static boolean isSingleVersion(SettingsServiceBean settingsService) {
        return false;
   }
+
+  /** Whether the archiver can delete existing archival files (and thus can retry when the existing files are incomplete/obsolete)
+   * A static version supports calls via reflection while the instance method supports inheritance for use on actual command instances (see DatasetPage for both use cases).
+   * @return
+   */
+  public static boolean supportsDelete() {
+      return false;
+  }
+
+  public boolean canDelete() {
+      return supportsDelete();
+  }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index f24ebdb8655..3035694ae3d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -120,7 +120,7 @@ public class BagGenerator {
 
     private boolean usetemp = false;
 
-    private int numConnections = 8;
+    private static int numConnections = 2;
     public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString();
 
     private OREMap oremap;
@@ -1124,9 +1124,9 @@ public void setAuthenticationKey(String tokenString) {
         apiKey = tokenString;
     }
 
-    public void setNumConnections(int numConnections) {
-        this.numConnections = numConnections;
-        logger.fine("BagGenerator will use " + numConnections + " threads");
+    public static void setNumConnections(int numConnections) {
+        BagGenerator.numConnections = numConnections;
+        logger.fine("All BagGenerators will use " + numConnections + " threads");
     }
 
 }
\ No newline at end of file

From 67e01e050d933005f69b4bae93678c865671130a Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 16 Dec 2025 10:35:40 -0500
Subject: [PATCH 05/14] archival submit fix - per version cache

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 28 +++++++++++++------
 src/main/webapp/dataset-versions.xhtml        |  4 +--
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 20617160a1c..8eba6cbeab9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -387,7 +387,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) {
     private boolean showIngestSuccess;
     
     private Boolean archivable = null;
-    private Boolean versionArchivable = null;
+    private HashMap<Long,Boolean> versionArchivable = new HashMap<>();
     private Boolean someVersionArchived = null;
 
     public boolean isShowIngestSuccess() {
@@ -6147,10 +6147,11 @@ public boolean isArchivable() {
         return archivable;
     }
 
-    public boolean isVersionArchivable() {
-        if (versionArchivable == null) {
+    public boolean isVersionArchivable(Long id) {
+        Boolean thisVersionArchivable = versionArchivable.get(id);
+        if (thisVersionArchivable == null) {
             // If this dataset isn't in an archivable collection return false
-            versionArchivable = false;
+            thisVersionArchivable = false;
             if (isArchivable()) {
                 boolean checkForArchivalCopy = false;
                 // Otherwise, we need to know if the archiver is single-version-only
@@ -6167,11 +6168,19 @@ public boolean isVersionArchivable() {
                         if (checkForArchivalCopy) {
                             // If we have to check (single version archiving), we can't allow archiving if
                             // one version is already archived (or attempted - any non-null status)
-                            versionArchivable = !isSomeVersionArchived();
+                            thisVersionArchivable = !isSomeVersionArchived();
                         } else {
-                            // If we allow multiple versions or didn't find one that has had archiving run
-                            // on it, we can archive, so return true
-                            versionArchivable = true;
+                            // If we didn't find one that has had archiving run
+                            // on it, or archiving per version is supported and either
+                            // the status is null or the archiver can delete prior runs and status isn't success,
+                            // we can archive, so return true
+                            // Find the specific version by id
+                            DatasetVersion targetVersion = dataset.getVersions().stream()
+                                    .filter(v -> v.getId().equals(id))
+                                    .findFirst()
+                                    .orElse(null);
+                            String status = targetVersion.getArchivalCopyLocationStatus();
+                            thisVersionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete));
                         }
                     } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException
                             | InvocationTargetException | NoSuchMethodException | SecurityException e) {
@@ -6180,8 +6189,9 @@ public boolean isVersionArchivable() {
                     }
                 }
             }
+            versionArchivable.put(id, thisVersionArchivable);
         }
-        return versionArchivable;
+        return thisVersionArchivable;
     }
 
     public boolean isSomeVersionArchived() {
diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml
index 9e5f0a9b24d..ee726bb5eee 100644
--- a/src/main/webapp/dataset-versions.xhtml
+++ b/src/main/webapp/dataset-versions.xhtml
@@ -171,11 +171,11 @@
               </ui:fragment>
               <h:outputText  value="#{bundle['file.dataFilesTab.versions.headers.archived.pending']}" rendered="#{'pending' eq versionTab.archivalCopyLocationStatus}" title="#{DatasetPage.isSuperUser() ? versionTab.archivalCopyLocationMessage : ''}"/>
               <h:outputText  value="#{bundle['file.dataFilesTab.versions.headers.archived.failure']}" rendered="#{'failure' eq versionTab.archivalCopyLocationStatus}" title="#{DatasetPage.isSuperUser() ? versionTab.archivalCopyLocationMessage : ''}"/>
-              <p:commandLink rendered="#{DatasetPage.isSuperUser() and DatasetPage.versionArchivable and empty(versionTab.archivalCopyLocation)}" update="#{p:resolveClientId('datasetForm:tabView:versionsTable', view)},:messagePanel" 
+              <p:commandLink rendered="#{DatasetPage.isSuperUser() and DatasetPage.isVersionArchivable(versionTab.id) and empty(versionTab.archivalCopyLocation)}" update="#{p:resolveClientId('datasetForm:tabView:versionsTable', view)},:messagePanel" 
                                                    action="#{DatasetPage.archiveVersion(versionTab.id)}">
                   <h:outputText value="#{bundle['file.dataFilesTab.versions.headers.archived.submit']}"/>
               </p:commandLink>
-              <h:outputText  value="#{bundle['file.dataFilesTab.versions.headers.archived.notarchived']}" rendered="#{ empty(versionTab.archivalCopyLocation) and ((not DatasetPage.isSuperUser() and DatasetPage.someVersionArchived) or (DatasetPage.isSuperUser() and not DatasetPage.versionArchivable))}"/>
+              <h:outputText  value="#{bundle['file.dataFilesTab.versions.headers.archived.notarchived']}" rendered="#{ empty(versionTab.archivalCopyLocation) and ((not DatasetPage.isSuperUser() and DatasetPage.someVersionArchived) or (DatasetPage.isSuperUser() and not DatasetPage.isVersionArchivable(versionTab.id)))}"/>
               
             </ui:fragment>
         </p:column><!-- end: archivalCopy column -->

From 50e8c61a8e1a878a7056f0da980e1a9e5271f957 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 29 Jan 2026 17:20:31 -0500
Subject: [PATCH 06/14] Add check to display submit button only if prior
 versions are archvd

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 34 +++++++++++++------
 .../dataverse/FileMetadataVersionsHelper.java | 14 ++------
 .../iq/dataverse/dataset/DatasetUtil.java     | 17 ++++++++++
 .../iq/dataverse/util/ArchiverUtil.java       | 11 ++++++
 4 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 8eba6cbeab9..375489484c0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -387,6 +387,8 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) {
     private boolean showIngestSuccess;
     
     private Boolean archivable = null;
+    private Boolean checkForArchivalCopy;
+    private Boolean supportsDelete;
     private HashMap<Long,Boolean> versionArchivable = new HashMap<>();
     private Boolean someVersionArchived = null;
 
@@ -6152,19 +6154,33 @@ public boolean isVersionArchivable(Long id) {
         if (thisVersionArchivable == null) {
             // If this dataset isn't in an archivable collection return false
             thisVersionArchivable = false;
+            boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived, false);
             if (isArchivable()) {
-                boolean checkForArchivalCopy = false;
                 // Otherwise, we need to know if the archiver is single-version-only
                 // If it is, we have to check for an existing archived version to answer the
                 // question
                 String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null);
                 if (className != null) {
                     try {
-                        Class<?> clazz = Class.forName(className);
-                        Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class);
-                        Object[] params = { settingsWrapper };
-                        checkForArchivalCopy = (Boolean) m.invoke(null, params);
-
+                        DatasetVersion targetVersion = dataset.getVersions().stream()
+                                .filter(v -> v.getId().equals(id)).findFirst().orElse(null);
+                        if (requiresEarlierVersionsToBeArchived) {// Find the specific version by id
+                            DatasetVersion priorVersion = DatasetUtil.getPriorVersion(targetVersion);
+
+                            if (priorVersion== null || (isVersionArchivable(priorVersion.getId())
+                                    && ArchiverUtil.isVersionArchived(priorVersion))) {
+                                thisVersionArchivable = true;
+                            }
+                        }
+                        if (checkForArchivalCopy == null) {
+                            //Only check once
+                            Class<?> clazz = Class.forName(className);
+                            Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class);
+                            Method m2 = clazz.getMethod("supportsDelete");
+                            Object[] params = { settingsWrapper };
+                            checkForArchivalCopy = (Boolean) m.invoke(null, params);
+                            supportsDelete = (Boolean) m2.invoke(null);
+                        }
                         if (checkForArchivalCopy) {
                             // If we have to check (single version archiving), we can't allow archiving if
                             // one version is already archived (or attempted - any non-null status)
@@ -6175,16 +6191,12 @@ public boolean isVersionArchivable(Long id) {
                             // the status is null or the archiver can delete prior runs and status isn't success,
                             // we can archive, so return true
                             // Find the specific version by id
-                            DatasetVersion targetVersion = dataset.getVersions().stream()
-                                    .filter(v -> v.getId().equals(id))
-                                    .findFirst()
-                                    .orElse(null);
                             String status = targetVersion.getArchivalCopyLocationStatus();
                             thisVersionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete));
                         }
                     } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException
                             | InvocationTargetException | NoSuchMethodException | SecurityException e) {
-                        logger.warning("Failed to call isSingleVersion on configured archiver class: " + className);
+                        logger.warning("Failed to call methods on configured archiver class: " + className);
                         e.printStackTrace();
                     }
                 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java
index 4d408a72c8c..cc632054642 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadataVersionsHelper.java
@@ -1,6 +1,7 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.dataset.DatasetUtil;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import jakarta.ejb.EJB;
 import jakarta.ejb.Stateless;
@@ -95,18 +96,7 @@ private FileMetadata getPreviousFileMetadata(FileMetadata fileMetadata, FileMeta
     //TODO: this could use some refactoring to cut down on the number of for loops!
     private FileMetadata getPreviousFileMetadata(FileMetadata fileMetadata, DatasetVersion currentversion) {
         List<DataFile> allfiles = allRelatedFiles(fileMetadata);
-        boolean foundCurrent = false;
-        DatasetVersion priorVersion = null;
-        for (DatasetVersion versionLoop : fileMetadata.getDatasetVersion().getDataset().getVersions()) {
-            if (foundCurrent) {
-                priorVersion = versionLoop;
-                break;
-            }
-            if (versionLoop.equals(currentversion)) {
-                foundCurrent = true;
-            }
-
-        }
+        DatasetVersion priorVersion = DatasetUtil.getPriorVersion(fileMetadata.getDatasetVersion());
         if (priorVersion != null && priorVersion.getFileMetadatasSorted() != null) {
             for (FileMetadata fmdTest : priorVersion.getFileMetadatasSorted()) {
                 for (DataFile fileTest : allfiles) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
index 2ce5471a523..79451a61a84 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -740,4 +740,21 @@ public static String getLocaleCurationStatusLabelFromString(String label) {
         }
         return localizedName;
     }
+    
+    // Find the prior version - relies on version sorting by major/minor numbers 
+    public static DatasetVersion getPriorVersion(DatasetVersion version) {
+        boolean foundCurrent = false;
+        DatasetVersion priorVersion = null;
+        for (DatasetVersion versionLoop : version.getDataset().getVersions()) {
+            if (foundCurrent) {
+                priorVersion = versionLoop;
+                break;
+            }
+            if (versionLoop.equals(version)) {
+                foundCurrent = true;
+            }
+
+        }
+        return priorVersion;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java
index 18ec6243d5a..7d03004f3f7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java
@@ -71,5 +71,16 @@ public static boolean isSomeVersionArchived(Dataset dataset) {
 
         return someVersionArchived;
     }
+    
+    /**
+     * Checks if a version has been successfully archived.
+     * 
+     * @param version the version to check
+     * @return true if the version has been successfully archived, false otherwise
+     */
+    public static boolean isVersionArchived(DatasetVersion version) {
+        String status = version.getArchivalCopyLocationStatus();
+        return status != null && status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS);
+    }
 
 }
\ No newline at end of file

From 06428970ac2a7644f7ca7b5d6c929ad639c20c31 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 29 Jan 2026 18:06:43 -0500
Subject: [PATCH 07/14] setting name tweak, add docs, release note

---
 doc/release-notes/12122-archiving in sequence.md     |  3 +++
 doc/sphinx-guides/source/installation/config.rst     | 12 ++++++++++++
 .../java/edu/harvard/iq/dataverse/DatasetPage.java   |  2 +-
 .../command/impl/AbstractSubmitToArchiveCommand.java |  2 +-
 .../iq/dataverse/settings/SettingsServiceBean.java   |  2 +-
 5 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 doc/release-notes/12122-archiving in sequence.md

diff --git a/doc/release-notes/12122-archiving in sequence.md b/doc/release-notes/12122-archiving in sequence.md
new file mode 100644
index 00000000000..6f4373a1e31
--- /dev/null
+++ b/doc/release-notes/12122-archiving in sequence.md	
@@ -0,0 +1,3 @@
+This release introduces an additial setting related to archival bag creation, ArchiveOnlyIfEarlierVersionsAreArchived (default false). 
+If it is true, dataset versions must be archived in order. That is, all prior versions of a dataset must be archived before the latest version can be archived.
+This is intended to support use cases where deduplication of files between dataset versions will be done (i.e. by a third-party service running at the archival copy location) and is a step towards supporting the Oxford Common File Layout (OCFL) as an archival format.
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index a9d5c7c0041..e0dffd10ac9 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2263,6 +2263,13 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local
 
 All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers).
 
+Two settings that can be used with all current Archivers are:
+
+- \:BagGeneratorThreads - the number of threads to use when adding data files to the zipped bag. The default is 2. Values of 4 or more may increase performance on larger machines but may cause problems if file access is throttled
+- \:ArchiveOnlyIfEarlierVersionsAreArchived - when true, requires dataset versions to be archived in order by confirming that all prior versions have been successfully archived before allowing a new version to be archived. Default is false 
+
+These must be included in the \:ArchiverSettings for the Archiver to work
+ 
 .. _Duracloud Configuration:
 
 Duracloud Configuration
@@ -5333,6 +5340,11 @@ This setting specifies which storage system to use by identifying the particular
 
 For examples, see the specific configuration above in :ref:`BagIt Export`.
  
+:ArchiveOnlyIfEarlierVersionsAreArchived
+++++++++++++++++++++++++++++++++++++++++
+
+This setting, if true, only allows creation of an archival Bag for a dataset version if all prior versions have been successfully archived. The default is false (any version can be archived independently as long as other settings allow it)
+         
 :ArchiverSettings
 +++++++++++++++++
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 375489484c0..5b267007887 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -6154,7 +6154,7 @@ public boolean isVersionArchivable(Long id) {
         if (thisVersionArchivable == null) {
             // If this dataset isn't in an archivable collection return false
             thisVersionArchivable = false;
-            boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived, false);
+            boolean requiresEarlierVersionsToBeArchived = settingsWrapper.isTrueForKey(SettingsServiceBean.Key.ArchiveOnlyIfEarlierVersionsAreArchived, false);
             if (isArchivable()) {
                 // Otherwise, we need to know if the archiver is single-version-only
                 // If it is, we have to check for an existing archived version to answer the
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
index 98e9dfb68e1..72f45ab5d2b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
@@ -90,7 +90,7 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t
         // this.requestedSettings won't be set yet in the workflow case, so set it now (used in getNumberOfBagGeneratorThreads)
         this.requestedSettings.putAll(requestedSettings);
         // Check if earlier versions must be archived first
-        String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiverOnlyIfEarlierVersionsAreArchived.toString());
+        String requireEarlierArchivedValue = requestedSettings.get(SettingsServiceBean.Key.ArchiveOnlyIfEarlierVersionsAreArchived.toString());
         boolean requireEarlierArchived = Boolean.parseBoolean(requireEarlierArchivedValue);
         if (requireEarlierArchived) {
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index fac136042ce..1cc9fda7645 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -489,7 +489,7 @@ Whether Harvesting (OAI) service is enabled
          * Only create an archival Bag for a dataset version if all prior versions have
          * been successfully archived
          */
-        ArchiverOnlyIfEarlierVersionsAreArchived,
+        ArchiveOnlyIfEarlierVersionsAreArchived,
         
         /**
          * Custom settings for each archiver. See list below.

From ca0af05c7f80fc28114ebde71b460917199d2a60 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 29 Jan 2026 18:15:55 -0500
Subject: [PATCH 08/14] simplify

---
 .../impl/AbstractSubmitToArchiveCommand.java  | 47 +++++++++----------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
index 72f45ab5d2b..8949f346567 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java
@@ -97,36 +97,31 @@ public WorkflowStepResult runArchivingProcess(DatasetVersion version, ApiToken t
             Dataset dataset = version.getDataset();
             List<DatasetVersion> versions = dataset.getVersions();
 
-            // Check all earlier versions (those with version numbers less than current)
-            for (DatasetVersion earlierVersion : versions) {
-                // Skip the current version and any versions that come after it
-                if (earlierVersion.getId().equals(version.getId())) {
-                    continue;
-                }
-
-                // Compare version numbers to ensure we only check earlier versions
-                if (earlierVersion.getVersionNumber() != null && version.getVersionNumber() != null) {
-                    if (earlierVersion.getVersionNumber() < version.getVersionNumber()
-                            || (earlierVersion.getVersionNumber().equals(version.getVersionNumber())
-                                    && earlierVersion.getMinorVersionNumber() < version.getMinorVersionNumber())) {
+            boolean foundCurrent = false;
 
-                        // Check if this earlier version has been successfully archived
-                        String archivalStatus = earlierVersion.getArchivalCopyLocationStatus();
-                        if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)
+            // versions are ordered, all versions after the current one have lower
+            // major/minor version numbers
+            for (DatasetVersion versionInLoop : versions) {
+                if (foundCurrent) {
+                    // Once foundCurrent is true, we are looking at prior versions
+                    // Check if this earlier version has been successfully archived
+                    String archivalStatus = versionInLoop.getArchivalCopyLocationStatus();
+                    if (archivalStatus == null || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)
 //                                || !archivalStatus.equals(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE)
-                        ) {
-                            JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder();
-                            statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
-                            statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE,
-                                    "Successful archiving of earlier versions is required.");
-                            version.setArchivalCopyLocation(statusObjectBuilder.build().toString());
-                            return new Failure(
-                                    "Earlier versions must be successfully archived first",
-                                    "Archival prerequisites not met"
-                                );
-                        }
+                    ) {
+                        JsonObjectBuilder statusObjectBuilder = Json.createObjectBuilder();
+                        statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
+                        statusObjectBuilder.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE,
+                                "Successful archiving of earlier versions is required.");
+                        version.setArchivalCopyLocation(statusObjectBuilder.build().toString());
+                        return new Failure("Earlier versions must be successfully archived first",
+                                "Archival prerequisites not met");
                     }
                 }
+                if (versionInLoop.equals(version)) {
+                    foundCurrent = true;
+                }
+
             }
         }
         // Delegate to the archiver-specific implementation

From 1808d2db3fb2ab55c08e25dd6e7f03dc5ed414ed Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 11:17:15 -0500
Subject: [PATCH 09/14] basic fetch

---
 .../iq/dataverse/settings/JvmSettings.java    |  4 +
 .../iq/dataverse/util/bagit/BagGenerator.java | 93 ++++++++++++++++---
 2 files changed, 86 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 05390ba8a8c..b32b7a8d77d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -276,6 +276,10 @@ public enum JvmSettings {
     BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"),
     BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"),
     BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"),
+    SCOPE_BAGIT_HOLEY(SCOPE_BAGIT, "holey"),
+    BAGIT_HOLEY_MAX_FILE_SIZE(SCOPE_BAGIT_HOLEY, "max-file-size"),
+    BAGIT_HOLEY_MAX_DATA_SIZE(SCOPE_BAGIT_HOLEY, "max-data-size"),
+    
 
     // STORAGE USE SETTINGS
     SCOPE_STORAGEUSE(PREFIX, "storageuse"),
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index 3035694ae3d..6d096704a58 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -126,6 +126,13 @@ public class BagGenerator {
     private OREMap oremap;
 
     static PrintWriter pw = null;
+    
+    //Holey Bags
+    private long maxDataFileSize = Long.MAX_VALUE;
+    private long maxTotalDataSize = Long.MAX_VALUE;
+    private long currentBagDataSize = 0;
+    private StringBuilder fetchFileContent = new StringBuilder();
+    private boolean usingFetchFile = false;
 
     /**
      * This BagGenerator creates a BagIt version 1.0
@@ -176,6 +183,13 @@ public BagGenerator(OREMap oreMap, String dataciteXml) throws JsonSyntaxExceptio
             logger.warning("Aint gonna work");
             e.printStackTrace();
         }
+        initializeHoleyBagLimits();
+    }
+
+    private void initializeHoleyBagLimits() {
+        this.maxDataFileSize = JvmSettings.BAGIT_HOLEY_MAX_FILE_SIZE.lookupOptional(Long.class).orElse(Long.MAX_VALUE);
+        this.maxTotalDataSize = JvmSettings.BAGIT_HOLEY_MAX_DATA_SIZE.lookupOptional(Long.class).orElse(Long.MAX_VALUE);
+        logger.fine("BagGenerator size limits - maxDataFileSize: " + maxDataFileSize + ", maxTotalDataSize: " + maxTotalDataSize);
     }
 
     public void setIgnoreHashes(boolean val) {
@@ -312,6 +326,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
 
         logger.fine("Creating bag: " + bagName);
 
+        writeFetchFile();
+        
         ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream);
 
         /*
@@ -479,7 +495,8 @@ public static String getValidName(String bagName) {
         return bagName.replaceAll("\\W", "-");
     }
 
-    private void processContainer(JsonObject item, String currentPath) throws IOException {
+    private void processContainer(JsonObject item, String currentPath)
+            throws IOException, ExecutionException, InterruptedException {
         JsonArray children = getChildren(item);
         HashSet<String> titles = new HashSet<String>();
         String title = null;
@@ -529,7 +546,6 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce
             } else {
                 resourceUsed[index] = true;
                 // add item
-                // ToDo
                 String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
                 logger.fine("File url: " + dataUrl);
                 String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
@@ -545,13 +561,22 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce
                     childPath=currentPath + directoryLabel.getAsString() + "/" + childTitle;
                 }
                 
+                // Get file size
+                Long fileSize = null;
+                if (child.has(JsonLDTerm.filesize.getLabel())) {
+                    fileSize = child.get(JsonLDTerm.filesize.getLabel()).getAsLong();
+                }
+                if(fileSize == null) {
+                    logger.severe("File size missing for " + childPath);
+                    throw new IOException("Unable to create bag due to missing file size");
+                }
 
                 String childHash = null;
                 if (child.has(JsonLDTerm.checksum.getLabel())) {
                     ChecksumType childHashType = ChecksumType.fromString(
                             child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@type").getAsString());
                     if (hashtype == null) {
-                    	//If one wasn't set as a default, pick up what the first child with one uses
+                        //If one wasn't set as a default, pick up what the first child with one uses
                         hashtype = childHashType;
                     }
                     if (hashtype != null && !hashtype.equals(childHashType)) {
@@ -574,7 +599,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce
                 }
                 try {
                     if ((childHash == null) | ignorehashes) {
-                        // Generate missing hashInputStream inputStream = null;
+                        // Generate missing hash
                         InputStream inputStream = null;
                         try {
                             inputStream = getInputStreamSupplier(dataUrl).get();
@@ -608,17 +633,30 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce
                             logger.warning("Unable to calculate a " + hashtype + " for " + dataUrl);
                         }
                     }
-                    logger.fine("Requesting: " + childPath + " from " + dataUrl);
-                    createFileFromURL(childPath, dataUrl);
+                    
+                    // Add file to bag or fetch file
+                    if (shouldAddToFetchFile(fileSize)) {
+                        // Add to fetch file instead of including in bag
+                        logger.fine("Adding to fetch file: " + childPath + " from " + dataUrl);
+                        addToFetchFile(dataUrl, fileSize, childPath);
+                        usingFetchFile = true;
+                    } else {
+                        // Add file to bag as before
+                        logger.fine("Requesting: " + childPath + " from " + dataUrl);
+                        createFileFromURL(childPath, dataUrl);
+                        if (fileSize != null) {
+                            currentBagDataSize += fileSize;
+                        }
+                    }
+                    
                     dataCount++;
                     if (dataCount % 1000 == 0) {
                         logger.info("Retrieval in progress: " + dataCount + " files retrieved");
                     }
-                    if (child.has(JsonLDTerm.filesize.getLabel())) {
-                        Long size = child.get(JsonLDTerm.filesize.getLabel()).getAsLong();
-                        totalDataSize += size;
-                        if (size > maxFileSize) {
-                            maxFileSize = size;
+                    if (fileSize != null) {
+                        totalDataSize += fileSize;
+                        if (fileSize > maxFileSize) {
+                            maxFileSize = fileSize;
                         }
                     }
                     if (child.has(JsonLDTerm.schemaOrg("fileFormat").getLabel())) {
@@ -638,6 +676,39 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce
         }
     }
 
+    // Helper method to determine if file should go to fetch file
+    private boolean shouldAddToFetchFile(long fileSize) {
+        
+        // Check individual file size limit
+        if (fileSize > maxDataFileSize) {
+            logger.fine("File size " + fileSize + " exceeds max data file size " + maxDataFileSize);
+            return true;
+        }
+        
+        // Check total bag size limit
+        if (currentBagDataSize + fileSize > maxTotalDataSize) {
+            logger.fine("Adding file would exceed max total data size. Current: " + currentBagDataSize + 
+                       ", File: " + fileSize + ", Max: " + maxTotalDataSize);
+            return true;
+        }
+        
+        return false;
+    }
+    
+ // Method to append to fetch file content
+    private void addToFetchFile(String url, long size, String filename) {
+        // Format: URL size filename
+        fetchFileContent.append(url).append(" ").append(Long.toString(size)).append(" ").append(filename).append("\n");
+    }
+
+    // Method to write fetch file to bag (call this before finalizing the bag)
+    private void writeFetchFile() throws IOException, ExecutionException, InterruptedException {
+        if (usingFetchFile && fetchFileContent.length() > 0) {
+            logger.info("Creating fetch.txt file for holey bag");
+            createFileFromString("fetch.txt", fetchFileContent.toString());
+        }
+    }
+
     private int getUnusedIndexOf(String childId) {
         int index = resourceIndex.indexOf(childId);
         if (resourceUsed[index] != null) {

From 366eccd486d9d4fd20a33effe8741f84e87cef4a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 13:52:18 -0500
Subject: [PATCH 10/14] order by file size

---
 .../iq/dataverse/util/bagit/BagGenerator.java | 299 ++++++++++--------
 1 file changed, 162 insertions(+), 137 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index 6d096704a58..a168f1ea5d9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -20,10 +20,11 @@
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.Map.Entry;
@@ -35,7 +36,6 @@
 import java.util.logging.Logger;
 import java.util.zip.ZipEntry;
 
-import edu.harvard.iq.dataverse.util.BundleUtil;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator;
 import org.apache.commons.compress.archivers.zip.ScatterZipOutputStream;
@@ -77,7 +77,6 @@
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagGeneratorThreads;
 import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
-import java.util.Optional;
 
 public class BagGenerator {
 
@@ -254,7 +253,15 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
             resourceUsed = new Boolean[aggregates.size() + 1];
             // Process current container (the aggregation itself) and its
             // children
-            processContainer(aggregation, currentPath);
+            // Recursively collect all files from the entire tree, start with an empty set of processedContainers
+            List<FileEntry> allFiles = new ArrayList<>();
+            collectAllFiles(aggregation, currentPath, allFiles);
+
+            // Sort files by size (smallest first)
+            Collections.sort(allFiles);
+
+            // Process all files in sorted order
+            processAllFiles(allFiles);
         }
         // Create manifest files
         // pid-mapping.txt - a DataOne recommendation to connect ids and
@@ -495,27 +502,29 @@ public static String getValidName(String bagName) {
         return bagName.replaceAll("\\W", "-");
     }
 
-    private void processContainer(JsonObject item, String currentPath)
+    // Collect all files recursively and process containers to create dirs in the zip
+    private void collectAllFiles(JsonObject item, String currentPath, List<FileEntry> allFiles) 
             throws IOException, ExecutionException, InterruptedException {
         JsonArray children = getChildren(item);
-        HashSet<String> titles = new HashSet<String>();
         String title = null;
         if (item.has(JsonLDTerm.dcTerms("Title").getLabel())) {
             title = item.get("Title").getAsString();
         } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) {
             title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
         }
-        logger.fine("Adding " + title + "/ to path " + currentPath);
+        logger.fine("Collecting files from " + title + "/ at path " + currentPath);
         currentPath = currentPath + title + "/";
+
+        // Mark this container as processed
+        String containerId = item.get("@id").getAsString();
+
+        // Create directory and update tracking for this container
         int containerIndex = -1;
         try {
             createDir(currentPath);
-            // Add containers to pid map and mark as 'used', but no sha1 hash
-            // value
-            containerIndex = getUnusedIndexOf(item.get("@id").getAsString());
+            containerIndex = getUnusedIndexOf(containerId);
             resourceUsed[containerIndex] = true;
-            pidMap.put(item.get("@id").getAsString(), currentPath);
-
+            pidMap.put(containerId, currentPath);
         } catch (InterruptedException | IOException | ExecutionException e) {
             e.printStackTrace();
             logger.severe(e.getMessage());
@@ -523,159 +532,156 @@ private void processContainer(JsonObject item, String currentPath)
                 resourceUsed[containerIndex] = false;
             }
             throw new IOException("Unable to create bag");
-
         }
-        for (int i = 0; i < children.size(); i++) {
 
-            // Find the ith child in the overall array of aggregated
-            // resources
+        for (int i = 0; i < children.size(); i++) {
             String childId = children.get(i).getAsString();
-            logger.fine("Processing: " + childId);
+            logger.fine("Examining: " + childId);
             int index = getUnusedIndexOf(childId);
-            if (resourceUsed[index] != null) {
-                System.out.println("Warning: reusing resource " + index);
-            }
 
-            // Aggregation is at index 0, so need to shift by 1 for aggregates
-            // entries
             JsonObject child = aggregates.get(index - 1).getAsJsonObject();
             if (childIsContainer(child)) {
-                // create dir and process children
-                // processContainer will mark this item as used
-                processContainer(child, currentPath);
+                // Recursively collect files from this container
+                collectAllFiles(child, currentPath, allFiles);
             } else {
-                resourceUsed[index] = true;
-                // add item
-                String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
-                logger.fine("File url: " + dataUrl);
-                String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
-                if (titles.contains(childTitle)) {
-                    logger.warning("**** Multiple items with the same title in: " + currentPath);
-                    logger.warning("**** Will cause failure in hash and size validation in: " + bagID);
-                } else {
-                    titles.add(childTitle);
-                }
-                String childPath = currentPath + childTitle;
-                JsonElement directoryLabel = child.get(JsonLDTerm.DVCore("directoryLabel").getLabel());
-                if(directoryLabel!=null) {
-                    childPath=currentPath + directoryLabel.getAsString() + "/" + childTitle;
-                }
-                
                 // Get file size
                 Long fileSize = null;
                 if (child.has(JsonLDTerm.filesize.getLabel())) {
                     fileSize = child.get(JsonLDTerm.filesize.getLabel()).getAsLong();
                 }
-                if(fileSize == null) {
-                    logger.severe("File size missing for " + childPath);
+                if (fileSize == null) {
+                    logger.severe("File size missing for child: " + childId);
                     throw new IOException("Unable to create bag due to missing file size");
                 }
 
-                String childHash = null;
-                if (child.has(JsonLDTerm.checksum.getLabel())) {
-                    ChecksumType childHashType = ChecksumType.fromString(
-                            child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@type").getAsString());
-                    if (hashtype == null) {
-                        //If one wasn't set as a default, pick up what the first child with one uses
-                        hashtype = childHashType;
-                    }
-                    if (hashtype != null && !hashtype.equals(childHashType)) {
-                        logger.warning("Multiple hash values in use - will calculate " + hashtype.toString()
-                            + " hashes for " + childTitle);
-                    } else {
-                        childHash = child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@value").getAsString();
-                        if (checksumMap.containsValue(childHash)) {
-                            // Something else has this hash
-                            logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: "
-                                + childHash + " in: " + bagID);
-                        }
-                        logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap");
-                        checksumMap.put(childPath, childHash);
-                    }
+                // Store minimal info for sorting - JsonObject is just a reference
+                allFiles.add(new FileEntry(fileSize, child, currentPath, index));
+            }
+        }
+    }
+    
+
+    // Process all files in sorted order
+    private void processAllFiles(List<FileEntry> sortedFiles) 
+            throws IOException, ExecutionException, InterruptedException {
+        
+        if ((hashtype == null) | ignorehashes) {
+            hashtype = DataFile.ChecksumType.SHA512;
+        }
+        
+        for (FileEntry entry : sortedFiles) {
+            // Extract all needed information from the JsonObject reference
+            JsonObject child = entry.jsonObject;
+            String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
+            String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
+            
+            // Build full path using stored currentPath
+            String childPath = entry.currentPath + childTitle;
+            JsonElement directoryLabel = child.get(JsonLDTerm.DVCore("directoryLabel").getLabel());
+            if (directoryLabel != null) {
+                childPath = entry.currentPath + directoryLabel.getAsString() + "/" + childTitle;
+            }
+            
+            // Get hash if exists
+            String childHash = null;
+            if (child.has(JsonLDTerm.checksum.getLabel())) {
+                ChecksumType childHashType = ChecksumType.fromString(
+                        child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@type").getAsString());
+                if (hashtype == null) {
+                    hashtype = childHashType;
                 }
-                if ((hashtype == null) | ignorehashes) {
-                    // Pick sha512 when ignoring hashes or none exist
-                    hashtype = DataFile.ChecksumType.SHA512;
+                if (hashtype != null && !hashtype.equals(childHashType)) {
+                    logger.warning("Multiple hash values in use - will calculate " + hashtype.toString()
+                            + " hashes for " + childTitle);
+                } else {
+                    childHash = child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@value").getAsString();
                 }
-                try {
-                    if ((childHash == null) | ignorehashes) {
-                        // Generate missing hash
-                        InputStream inputStream = null;
-                        try {
-                            inputStream = getInputStreamSupplier(dataUrl).get();
-
-                            if (hashtype != null) {
-                                if (hashtype.equals(DataFile.ChecksumType.SHA1)) {
-                                    childHash = DigestUtils.sha1Hex(inputStream);
-                                } else if (hashtype.equals(DataFile.ChecksumType.SHA256)) {
-                                    childHash = DigestUtils.sha256Hex(inputStream);
-                                } else if (hashtype.equals(DataFile.ChecksumType.SHA512)) {
-                                    childHash = DigestUtils.sha512Hex(inputStream);
-                                } else if (hashtype.equals(DataFile.ChecksumType.MD5)) {
-                                    childHash = DigestUtils.md5Hex(inputStream);
-                                }
+            }
+            
+            resourceUsed[entry.resourceIndex] = true;
+            
+            try {
+                if ((childHash == null) | ignorehashes) {
+                    // Generate missing hash
+                    InputStream inputStream = null;
+                    try {
+                        inputStream = getInputStreamSupplier(dataUrl).get();
+
+                        if (hashtype != null) {
+                            if (hashtype.equals(DataFile.ChecksumType.SHA1)) {
+                                childHash = DigestUtils.sha1Hex(inputStream);
+                            } else if (hashtype.equals(DataFile.ChecksumType.SHA256)) {
+                                childHash = DigestUtils.sha256Hex(inputStream);
+                            } else if (hashtype.equals(DataFile.ChecksumType.SHA512)) {
+                                childHash = DigestUtils.sha512Hex(inputStream);
+                            } else if (hashtype.equals(DataFile.ChecksumType.MD5)) {
+                                childHash = DigestUtils.md5Hex(inputStream);
                             }
-
-                        } catch (IOException e) {
-                            logger.severe("Failed to read " + childPath);
-                            throw e;
-                        } finally {
-                            IOUtils.closeQuietly(inputStream);
-                        }
-                        if (childHash != null) {
-                            JsonObject childHashObject = new JsonObject();
-                            childHashObject.addProperty("@type", hashtype.toString());
-                            childHashObject.addProperty("@value", childHash);
-                            child.add(JsonLDTerm.checksum.getLabel(), (JsonElement) childHashObject);
-
-                            checksumMap.put(childPath, childHash);
-                        } else {
-                            logger.warning("Unable to calculate a " + hashtype + " for " + dataUrl);
                         }
+
+                    } catch (IOException e) {
+                        logger.severe("Failed to read " + childPath);
+                        throw e;
+                    } finally {
+                        IOUtils.closeQuietly(inputStream);
                     }
-                    
-                    // Add file to bag or fetch file
-                    if (shouldAddToFetchFile(fileSize)) {
-                        // Add to fetch file instead of including in bag
-                        logger.fine("Adding to fetch file: " + childPath + " from " + dataUrl);
-                        addToFetchFile(dataUrl, fileSize, childPath);
-                        usingFetchFile = true;
+                    if (childHash != null) {
+                        JsonObject childHashObject = new JsonObject();
+                        childHashObject.addProperty("@type", hashtype.toString());
+                        childHashObject.addProperty("@value", childHash);
+                        child.add(JsonLDTerm.checksum.getLabel(), (JsonElement) childHashObject);
+
+                        checksumMap.put(childPath, childHash);
                     } else {
-                        // Add file to bag as before
-                        logger.fine("Requesting: " + childPath + " from " + dataUrl);
-                        createFileFromURL(childPath, dataUrl);
-                        if (fileSize != null) {
-                            currentBagDataSize += fileSize;
-                        }
+                        logger.warning("Unable to calculate a " + hashtype + " for " + dataUrl);
                     }
-                    
-                    dataCount++;
-                    if (dataCount % 1000 == 0) {
-                        logger.info("Retrieval in progress: " + dataCount + " files retrieved");
-                    }
-                    if (fileSize != null) {
-                        totalDataSize += fileSize;
-                        if (fileSize > maxFileSize) {
-                            maxFileSize = fileSize;
-                        }
-                    }
-                    if (child.has(JsonLDTerm.schemaOrg("fileFormat").getLabel())) {
-                        mimetypes.add(child.get(JsonLDTerm.schemaOrg("fileFormat").getLabel()).getAsString());
+                } else {
+                    // Hash already exists, add to checksumMap
+                    if (checksumMap.containsValue(childHash)) {
+                        logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + 
+                                     " has hash: " + childHash + " in: " + bagID);
                     }
-
-                } catch (Exception e) {
-                    resourceUsed[index] = false;
-                    e.printStackTrace();
-                    throw new IOException("Unable to create bag");
+                    logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap");
+                    checksumMap.put(childPath, childHash);
+                }
+                
+                // Add file to bag or fetch file
+                if (shouldAddToFetchFile(entry.size)) {
+                    logger.fine("Adding to fetch file: " + childPath + " from " + dataUrl + 
+                               " (size: " + entry.size + " bytes)");
+                    addToFetchFile(dataUrl, entry.size, childPath);
+                    usingFetchFile = true;
+                } else {
+                    logger.fine("Requesting: " + childPath + " from " + dataUrl + 
+                               " (size: " + entry.size + " bytes)");
+                    createFileFromURL(childPath, dataUrl);
+                    currentBagDataSize += entry.size;
+                }
+                
+                dataCount++;
+                if (dataCount % 1000 == 0) {
+                    logger.info("Retrieval in progress: " + dataCount + " files retrieved");
+                }
+                
+                totalDataSize += entry.size;
+                if (entry.size > maxFileSize) {
+                    maxFileSize = entry.size;
+                }
+                
+                if (child.has(JsonLDTerm.schemaOrg("fileFormat").getLabel())) {
+                    mimetypes.add(child.get(JsonLDTerm.schemaOrg("fileFormat").getLabel()).getAsString());
                 }
 
-                // Check for nulls!
-                pidMap.put(child.get("@id").getAsString(), childPath);
-
+            } catch (Exception e) {
+                resourceUsed[entry.resourceIndex] = false;
+                e.printStackTrace();
+                throw new IOException("Unable to create bag");
             }
+
+            pidMap.put(child.get("@id").getAsString(), childPath);
         }
     }
-
+    
     // Helper method to determine if file should go to fetch file
     private boolean shouldAddToFetchFile(long fileSize) {
         
@@ -1199,5 +1205,24 @@ public static void setNumConnections(int numConnections) {
         BagGenerator.numConnections = numConnections;
         logger.fine("All BagGenerators will use " + numConnections + " threads");
     }
-
+    
+ // Inner class to hold file information before processing
+    private static class FileEntry implements Comparable<FileEntry> {
+        final long size;
+        final JsonObject jsonObject;  // Direct reference, not a copy
+        final String currentPath;     // Parent directory path
+        final int resourceIndex;      // Still need this for resourceUsed tracking
+        
+        FileEntry(long size, JsonObject jsonObject, String currentPath, int resourceIndex) {
+            this.size = size;
+            this.jsonObject = jsonObject;
+            this.currentPath = currentPath;
+            this.resourceIndex = resourceIndex;
+        }
+        
+        @Override
+        public int compareTo(FileEntry other) {
+            return Long.compare(this.size, other.size);
+        }
+    }
 }
\ No newline at end of file

From eec333b42a204bf1cf953a556665a9638700ed95 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 15:10:28 -0500
Subject: [PATCH 11/14] only add subcollection folders (if they exist)

---
 .../iq/dataverse/util/bagit/BagGenerator.java | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index a168f1ea5d9..0c8c477918e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -255,7 +255,7 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
             // children
             // Recursively collect all files from the entire tree, start with an empty set of processedContainers
             List<FileEntry> allFiles = new ArrayList<>();
-            collectAllFiles(aggregation, currentPath, allFiles);
+            collectAllFiles(aggregation, currentPath, allFiles, false);
 
             // Sort files by size (smallest first)
             Collections.sort(allFiles);
@@ -503,18 +503,19 @@ public static String getValidName(String bagName) {
     }
 
     // Collect all files recursively and process containers to create dirs in the zip
-    private void collectAllFiles(JsonObject item, String currentPath, List<FileEntry> allFiles) 
+    private void collectAllFiles(JsonObject item, String currentPath, List<FileEntry> allFiles, boolean addTitle) 
             throws IOException, ExecutionException, InterruptedException {
         JsonArray children = getChildren(item);
-        String title = null;
-        if (item.has(JsonLDTerm.dcTerms("Title").getLabel())) {
-            title = item.get("Title").getAsString();
-        } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) {
-            title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
+        if (addTitle) {
+            String title = null;
+            if (item.has(JsonLDTerm.dcTerms("Title").getLabel())) {
+                title = item.get("Title").getAsString();
+            } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) {
+                title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
+            }
+            logger.fine("Collecting files from " + title + "/ at path " + currentPath);
+            currentPath = currentPath + title + "/";
         }
-        logger.fine("Collecting files from " + title + "/ at path " + currentPath);
-        currentPath = currentPath + title + "/";
-
         // Mark this container as processed
         String containerId = item.get("@id").getAsString();
 
@@ -540,9 +541,10 @@ private void collectAllFiles(JsonObject item, String currentPath, List<FileEntry
             int index = getUnusedIndexOf(childId);
 
             JsonObject child = aggregates.get(index - 1).getAsJsonObject();
+            // Dataverse does not currently use containers - this is for other variants/future use
             if (childIsContainer(child)) {
                 // Recursively collect files from this container
-                collectAllFiles(child, currentPath, allFiles);
+                collectAllFiles(child, currentPath, allFiles, true);
             } else {
                 // Get file size
                 Long fileSize = null;
@@ -1026,6 +1028,7 @@ private static JsonArray getChildren(JsonObject parent) {
 
     // Logic to decide if this is a container -
     // first check for children, then check for source-specific type indicators
+    // Dataverse does not currently use containers - this is for other variants/future use 
     private static boolean childIsContainer(JsonObject item) {
         if (getChildren(item).size() != 0) {
             return true;

From 54457005f144eeb899d334885985afa388b08e3f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 15:22:28 -0500
Subject: [PATCH 12/14] replace deprecated constructs

---
 .../iq/dataverse/util/bagit/BagGenerator.java | 110 ++++++++----------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index 0c8c477918e..f122346e2fd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -43,6 +43,7 @@
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest;
 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.compress.archivers.zip.ZipFile.Builder;
 import org.apache.commons.compress.parallel.InputStreamSupplier;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.text.WordUtils;
@@ -381,7 +382,6 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
 
     public boolean generateBag(String bagName, boolean temp) {
         usetemp = temp;
-        FileOutputStream bagFileOS = null;
         try {
             File origBagFile = getBagFile(bagName);
             File bagFile = origBagFile;
@@ -390,82 +390,78 @@ public boolean generateBag(String bagName, boolean temp) {
                 logger.fine("Writing to: " + bagFile.getAbsolutePath());
             }
             // Create an output stream backed by the file
-            bagFileOS = new FileOutputStream(bagFile);
-            if (generateBag(bagFileOS)) {
-                //The generateBag call sets this.bagName to the correct value
-                validateBagFile(bagFile);
-                if (usetemp) {
-                    logger.fine("Moving tmp zip");
-                    origBagFile.delete();
-                    bagFile.renameTo(origBagFile);
+            try (FileOutputStream bagFileOS = new FileOutputStream(bagFile)) {
+                if (generateBag(bagFileOS)) {
+                    // The generateBag call sets this.bagName to the correct value
+                    validateBagFile(bagFile);
+                    if (usetemp) {
+                        logger.fine("Moving tmp zip");
+                        origBagFile.delete();
+                        bagFile.renameTo(origBagFile);
+                    }
+                    return true;
+                } else {
+                    return false;
                 }
-                return true;
-            } else {
-                return false;
             }
         } catch (Exception e) {
             logger.log(Level.SEVERE,"Bag Exception: ", e);
             e.printStackTrace();
             logger.warning("Failure: Processing failure during Bagit file creation");
             return false;
-        } finally {
-            IOUtils.closeQuietly(bagFileOS);
         }
     }
 
     public void validateBag(String bagId) {
         logger.info("Validating Bag");
-        ZipFile zf = null;
-        InputStream is = null;
         try {
             File bagFile = getBagFile(bagId);
-            zf = new ZipFile(bagFile);
-            ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt");
-            if (entry != null) {
-                logger.info("SHA1 hashes used");
-                hashtype = DataFile.ChecksumType.SHA1;
-            } else {
-                entry = zf.getEntry(getValidName(bagId) + "/manifest-sha512.txt");
+            try (ZipFile zf = ZipFile.builder().setFile(bagFile).get()) {
+                ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt");
                 if (entry != null) {
-                    logger.info("SHA512 hashes used");
-                    hashtype = DataFile.ChecksumType.SHA512;
+                    logger.info("SHA1 hashes used");
+                    hashtype = DataFile.ChecksumType.SHA1;
                 } else {
-                    entry = zf.getEntry(getValidName(bagId) + "/manifest-sha256.txt");
+                    entry = zf.getEntry(getValidName(bagId) + "/manifest-sha512.txt");
                     if (entry != null) {
-                        logger.info("SHA256 hashes used");
-                        hashtype = DataFile.ChecksumType.SHA256;
+                        logger.info("SHA512 hashes used");
+                        hashtype = DataFile.ChecksumType.SHA512;
                     } else {
-                        entry = zf.getEntry(getValidName(bagId) + "/manifest-md5.txt");
+                        entry = zf.getEntry(getValidName(bagId) + "/manifest-sha256.txt");
                         if (entry != null) {
-                            logger.info("MD5 hashes used");
-                            hashtype = DataFile.ChecksumType.MD5;
+                            logger.info("SHA256 hashes used");
+                            hashtype = DataFile.ChecksumType.SHA256;
+                        } else {
+                            entry = zf.getEntry(getValidName(bagId) + "/manifest-md5.txt");
+                            if (entry != null) {
+                                logger.info("MD5 hashes used");
+                                hashtype = DataFile.ChecksumType.MD5;
+                            }
                         }
                     }
                 }
+                if (entry == null)
+                    throw new IOException("No manifest file found");
+                try (InputStream is = zf.getInputStream(entry)) {
+                    BufferedReader br = new BufferedReader(new InputStreamReader(is));
+                    String line = br.readLine();
+                    while (line != null) {
+                        logger.fine("Hash entry: " + line);
+                        int breakIndex = line.indexOf(' ');
+                        String hash = line.substring(0, breakIndex);
+                        String path = line.substring(breakIndex + 1);
+                        logger.fine("Adding: " + path + " with hash: " + hash);
+                        checksumMap.put(path, hash);
+                        line = br.readLine();
+                    }
+                }
             }
-            if (entry == null)
-                throw new IOException("No manifest file found");
-            is = zf.getInputStream(entry);
-            BufferedReader br = new BufferedReader(new InputStreamReader(is));
-            String line = br.readLine();
-            while (line != null) {
-                logger.fine("Hash entry: " + line);
-                int breakIndex = line.indexOf(' ');
-                String hash = line.substring(0, breakIndex);
-                String path = line.substring(breakIndex + 1);
-                logger.fine("Adding: " + path + " with hash: " + hash);
-                checksumMap.put(path, hash);
-                line = br.readLine();
-            }
-            IOUtils.closeQuietly(is);
             logger.info("HashMap Map contains: " + checksumMap.size() + " entries");
             checkFiles(checksumMap, bagFile);
         } catch (IOException io) {
             logger.log(Level.SEVERE,"Could not validate Hashes", io);
         } catch (Exception e) {
             logger.log(Level.SEVERE,"Could not validate Hashes", e);
-        } finally {
-            IOUtils.closeQuietly(zf);
         }
         return;
     }
@@ -605,10 +601,8 @@ private void processAllFiles(List<FileEntry> sortedFiles)
             try {
                 if ((childHash == null) | ignorehashes) {
                     // Generate missing hash
-                    InputStream inputStream = null;
-                    try {
-                        inputStream = getInputStreamSupplier(dataUrl).get();
-
+                    
+                    try (InputStream inputStream = getInputStreamSupplier(dataUrl).get()){
                         if (hashtype != null) {
                             if (hashtype.equals(DataFile.ChecksumType.SHA1)) {
                                 childHash = DigestUtils.sha1Hex(inputStream);
@@ -624,8 +618,6 @@ private void processAllFiles(List<FileEntry> sortedFiles)
                     } catch (IOException e) {
                         logger.severe("Failed to read " + childPath);
                         throw e;
-                    } finally {
-                        IOUtils.closeQuietly(inputStream);
                     }
                     if (childHash != null) {
                         JsonObject childHashObject = new JsonObject();
@@ -782,11 +774,13 @@ private void createFileFromURL(final String relPath, final String uri)
         addEntry(archiveEntry, supp);
     }
 
+    @SuppressWarnings("deprecation")
     private void checkFiles(HashMap<String, String> shaMap, File bagFile) {
         ExecutorService executor = Executors.newFixedThreadPool(numConnections);
-        ZipFile zf = null;
-        try {
-            zf = new ZipFile(bagFile);
+
+        try (ZipFile zf = ZipFile.builder()
+                .setFile(bagFile)
+                .get() ){
 
             BagValidationJob.setZipFile(zf);
             BagValidationJob.setBagGenerator(this);
@@ -813,8 +807,6 @@ private void checkFiles(HashMap<String, String> shaMap, File bagFile) {
         } catch (IOException e1) {
             // TODO Auto-generated catch block
             e1.printStackTrace();
-        } finally {
-            IOUtils.closeQuietly(zf);
         }
         logger.fine("Hash Validations Completed");
 

From b746d5db75c85c9c71ce9bd440d237df6a3456be Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 16:35:36 -0500
Subject: [PATCH 13/14] restore name collision check

---
 .../iq/dataverse/util/bagit/BagGenerator.java      | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index f122346e2fd..a488499b8fe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -22,6 +22,7 @@
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -133,7 +134,7 @@ public class BagGenerator {
     private long currentBagDataSize = 0;
     private StringBuilder fetchFileContent = new StringBuilder();
     private boolean usingFetchFile = false;
-
+    
     /**
      * This BagGenerator creates a BagIt version 1.0
      * (https://tools.ietf.org/html/draft-kunze-bagit-16) compliant bag that is also
@@ -563,6 +564,9 @@ private void collectAllFiles(JsonObject item, String currentPath, List<FileEntry
     private void processAllFiles(List<FileEntry> sortedFiles) 
             throws IOException, ExecutionException, InterruptedException {
         
+        // Track titles to detect duplicates
+        Set<String> titles = new HashSet<>();
+        
         if ((hashtype == null) | ignorehashes) {
             hashtype = DataFile.ChecksumType.SHA512;
         }
@@ -573,6 +577,14 @@ private void processAllFiles(List<FileEntry> sortedFiles)
             String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
             String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
             
+            // Check for duplicate titles
+            if (titles.contains(childTitle)) {
+                logger.warning("**** Multiple items with the same title in: " + entry.currentPath);
+                logger.warning("**** Will cause failure in hash and size validation in: " + bagID);
+            } else {
+                titles.add(childTitle);
+            }
+            
             // Build full path using stored currentPath
             String childPath = entry.currentPath + childTitle;
             JsonElement directoryLabel = child.get(JsonLDTerm.DVCore("directoryLabel").getLabel());

From 88edc8aefe591bcc593dde8bdda27e8f89d26f6d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Jan 2026 16:35:53 -0500
Subject: [PATCH 14/14] add null check to quiet log/avoid exception

---
 .../harvard/iq/dataverse/util/bagit/OREMap.java   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java
index 4cbc2aa7b9a..dd651885d01 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java
@@ -505,11 +505,16 @@ private static void addCvocValue(String val, JsonArrayBuilder vals, JsonObject c
                 for (String prefix : context.keySet()) {
                     localContext.putIfAbsent(prefix, context.getString(prefix));
                 }
-                JsonObjectBuilder job = Json.createObjectBuilder(datasetFieldService.getExternalVocabularyValue(val));
-                job.add("@id", val);
-                JsonObject extVal = job.build();
-                logger.fine("Adding: " + extVal);
-                vals.add(extVal);
+                JsonObject cachedValue = datasetFieldService.getExternalVocabularyValue(val);
+                if (cachedValue != null) {
+                    JsonObjectBuilder job = Json.createObjectBuilder(cachedValue);
+                    job.add("@id", val);
+                    JsonObject extVal = job.build();
+                    logger.fine("Adding: " + extVal);
+                    vals.add(extVal);
+                } else {
+                    vals.add(val);
+                }
             } else {
                 vals.add(val);
             }