diff --git a/doc/release-notes/12081-reExportAll-improvements.md b/doc/release-notes/12081-reExportAll-improvements.md new file mode 100644 index 00000000000..bfbc3fe2944 --- /dev/null +++ b/doc/release-notes/12081-reExportAll-improvements.md @@ -0,0 +1,5 @@ +This release includes several improvements to the admin/metadata/reExportAll API call: + +- Fatal run time problems, such as a problem with the database connection (seen at DANS) get logged to the export log before terminating, making it easier to see when there has been a problem +- A new optional ?olderThan= parameter has been introduced to allow incremental reExportAll, i.e. allowing reExportAll to be done in smaller chunks, or to allow reExporting datasets last updated before some exporter change occurred, or to restart after a failure (use olderThan=) +- Performance and memory use have been improved \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 97baf3e0c8e..5a60e68afc5 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -22,12 +22,15 @@ In addition to the automated exports, a Dataverse installation admin can start a ``curl http://localhost:8080/api/admin/metadata/reExportAll`` +``curl http://localhost:8080/api/admin/metadata/reExportAll?olderThan=`` + ``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps`` ``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. -The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +With the optional olderThan query parameter, the second will *force* re-export of all published, local datasets that were never exported or last exported before the olderThan date. The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index a58dad4f4c7..cbca48f6988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -698,6 +698,13 @@ public void reExportAllAsync() { exportAllDatasets(true); } + // reExportAll with a date *forces* a reexport on all published datasets that were not exported or were exported before the date; + @Asynchronous + public void reExportAllAsync(Date reExportDate) { + exportAllDatasets(true, reExportDate); + + } + public void reExportAll() { exportAllDatasets(true); } @@ -715,7 +722,12 @@ public void exportAll() { exportAllDatasets(false); } - public void exportAllDatasets(boolean forceReExport) { + private void exportAllDatasets(boolean b) { + exportAllDatasets(b, null); + } + + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + private void exportAllDatasets(boolean forceReExport, Date reExportDate) { Integer countAll = 0; Integer countSuccess = 0; Integer countError = 0; @@ -723,22 +735,14 @@ public void exportAllDatasets(boolean forceReExport) { Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "export_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded; try { fileHandler = new FileHandler(logFileName); exportLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); return; } - if (fileHandlerSuceeded) { - exportLogger.addHandler(fileHandler); - } else { - exportLogger = logger; - } - exportLogger.info("Starting an export all job"); for (Long datasetId : findAllLocalDatasetIds()) { @@ -757,9 +761,17 @@ public void exportAllDatasets(boolean forceReExport) { // can't trust dataset.getPublicationDate(), no. Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) - if (forceReExport || (publicationDate != null - && (dataset.getLastExportTime() == null - || dataset.getLastExportTime().before(publicationDate)))) { + /** + * Three cases: force is true and no date given - reexport every dataset force + * is true and reExport date given - reexport datasets last exported before that + * date force is false, reExportDate ignored - reexport datasets last exported + * before they were last published + */ + if ((forceReExport && reExportDate == null) + || (forceReExport && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(reExportDate))) + || (forceReExport == false + && (publicationDate != null && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate))))) { countAll++; try { recordService.exportAllFormatsInNewTransaction(dataset); @@ -768,6 +780,13 @@ public void exportAllDatasets(boolean forceReExport) { } catch (Exception ex) { exportLogger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex); countError++; + } catch (Throwable t) { + exportLogger.log(Level.SEVERE, "Fatal error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + t.getClass().getName() + ": " + t.getMessage(), t); + exportLogger.info("Datasets processed before fatal error: " + countAll.toString()); + exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); + exportLogger.info("Datasets failures: " + countError.toString()); + fileHandler.close(); + throw t; } } } @@ -778,10 +797,7 @@ public void exportAllDatasets(boolean forceReExport) { exportLogger.info("Datasets failures: " + countError.toString()); exportLogger.info("Finished export-all job."); - if (fileHandlerSuceeded) { - fileHandler.close(); - } - + fileHandler.close(); } @Asynchronous @@ -1140,4 +1156,5 @@ public void saveStorageQuota(Dataset target, Long allocation) { } em.flush(); } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index bd937878286..8d63d25cfd3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -8,11 +8,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import java.util.Date; import java.util.logging.Logger; import jakarta.ejb.EJB; -import jakarta.json.Json; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.*; import jakarta.ws.rs.core.Response; @@ -57,8 +55,18 @@ public Response exportAll() { @GET @Path("/reExportAll") @Produces("application/json") - public Response reExportAll() { - datasetService.reExportAllAsync(); + public Response reExportAll(@QueryParam(value = "olderThan") String olderThan) { + Date reExportDate = null; + if (olderThan != null && !olderThan.isEmpty()) { + try { + java.text.SimpleDateFormat dateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setLenient(false); + reExportDate = dateFormat.parse(olderThan); + } catch (java.text.ParseException e) { + return error(Response.Status.BAD_REQUEST, "Invalid date format for olderThan parameter. Expected format: YYYY-MM-DD"); + } + } + datasetService.reExportAllAsync(reExportDate); return this.accepted(); }