From ce2dac8925d708f3d43bf0696a85d2de597eaed8 Mon Sep 17 00:00:00 2001 From: Cole-Greer Date: Thu, 5 Jun 2025 09:30:39 -0700 Subject: [PATCH 1/3] add additional logging to AddCloneTask --- .../neptune/cluster/AddCloneTask.java | 126 ++++++++++++++---- 1 file changed, 98 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java index 13253326..aee341d4 100644 --- a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java +++ b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java @@ -126,22 +126,34 @@ private void createReplicas(NeptuneClusterMetadata sourceClusterMetadata, DBParameterGroup dbParameterGroup, DBCluster targetDbCluster) { + logger.info("Creating {} replica instances with instance type {}", replicaCount, instanceType); ExecutorService taskExecutor = Executors.newFixedThreadPool(replicaCount); for (int i = 0; i < replicaCount; i++) { - - taskExecutor.execute(() -> createInstance("replica", - neptune, - sourceClusterMetadata, - instanceType, - dbParameterGroup, - targetDbCluster)); + final int replicaNumber = i + 1; + logger.debug("Scheduling creation of replica {}/{}", replicaNumber, replicaCount); + + taskExecutor.execute(() -> { + logger.debug("Starting creation of replica instance {}/{}", replicaNumber, replicaCount); + createInstance("replica", + neptune, + sourceClusterMetadata, + instanceType, + dbParameterGroup, + targetDbCluster); + logger.debug("Completed creation of replica instance {}/{}", replicaNumber, replicaCount); + }); } taskExecutor.shutdown(); try { - taskExecutor.awaitTermination(30, TimeUnit.MINUTES); + boolean completed = taskExecutor.awaitTermination(30, TimeUnit.MINUTES); + if (completed) { + logger.debug("Successfully created all {} replica instances", replicaCount); + } else { + logger.warn("Timed out waiting for all replicas to be created after 30 minutes"); + } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); @@ -168,10 +180,27 @@ private DBCluster createCluster(NeptuneClusterMetadata sourceClusterMetadata, .tags(getTags(sourceClusterMetadata.clusterId())); if (this.enableAuditLogs) { + logger.debug("Enabling audit logs for cloned cluster"); restoreDbClusterToPointInTimeRequestBuilder = restoreDbClusterToPointInTimeRequestBuilder.enableCloudwatchLogsExports("audit"); } - DBCluster targetDbCluster = neptune.restoreDBClusterToPointInTime(restoreDbClusterToPointInTimeRequestBuilder.build()).dbCluster(); + DBCluster targetDbCluster; + RestoreDbClusterToPointInTimeRequest request = restoreDbClusterToPointInTimeRequestBuilder.build(); + + try { + logger.debug("Sending restore DB cluster request: {}", request); + targetDbCluster = neptune.restoreDBClusterToPointInTime(request).dbCluster(); + } catch (NeptuneException e) { + logger.error("Failed to create target cluster: {} (Error code: {}, Message: {})", + targetClusterId, e.awsErrorDetails().errorCode(), e.getMessage()); + + // Log additional details that might help with troubleshooting + logger.debug("Request details: sourceClusterId={}, targetClusterId={}, dbSubnetGroupName={}, vpcSecurityGroupIds={}", + sourceClusterId, targetClusterId, sourceClusterMetadata.dbSubnetGroupName(), + String.join(",", sourceClusterMetadata.vpcSecurityGroupIds())); + + throw new RuntimeException("Failed to create clone cluster: " + e.getMessage(), e); + } String clusterStatus = targetDbCluster.status(); @@ -190,6 +219,13 @@ private DBCluster createCluster(NeptuneClusterMetadata sourceClusterMetadata, .status(); } + logger.debug("Cluster {} is now in {} state", targetClusterId, clusterStatus); + + // Check if the final status indicates success + if (!clusterStatus.equals("available")) { + logger.warn("Cluster {} is in {} state instead of 'available'", targetClusterId, clusterStatus); + } + return targetDbCluster; } @@ -266,16 +302,24 @@ private DBParameterGroup createDbParameterGroup(NeptuneClusterMetadata sourceClu private DBClusterParameterGroup createDbClusterParameterGroup(NeptuneClusterMetadata sourceClusterMetadata, NeptuneClient neptune) { + String paramGroupName = String.format("%s-db-cluster-params", targetClusterId); DBClusterParameterGroup dbClusterParameterGroup; - - dbClusterParameterGroup = neptune.createDBClusterParameterGroup( - CreateDbClusterParameterGroupRequest.builder() - .dbClusterParameterGroupName(String.format("%s-db-cluster-params", targetClusterId)) - .description(String.format("%s DB Cluster Parameter Group", targetClusterId)) - .dbParameterGroupFamily(sourceClusterMetadata.dbParameterGroupFamily()) - .tags(getTags(sourceClusterMetadata.clusterId())) - .build() - ).dbClusterParameterGroup(); + + try { + CreateDbClusterParameterGroupRequest request = CreateDbClusterParameterGroupRequest.builder() + .dbClusterParameterGroupName(paramGroupName) + .description(String.format("%s DB Cluster Parameter Group", targetClusterId)) + .dbParameterGroupFamily(sourceClusterMetadata.dbParameterGroupFamily()) + .tags(getTags(sourceClusterMetadata.clusterId())) + .build(); + + dbClusterParameterGroup = neptune.createDBClusterParameterGroup(request).dbClusterParameterGroup(); + logger.debug("Successfully created DB cluster parameter group: {}", dbClusterParameterGroup.dbClusterParameterGroupName()); + } catch (NeptuneException e) { + logger.error("Failed to create DB cluster parameter group: {} (Error code: {}, Message: {})", + paramGroupName, e.awsErrorDetails().errorCode(), e.getMessage()); + throw new RuntimeException("Failed to create DB cluster parameter group: " + e.getMessage(), e); + } String neptuneStreamsParameterValue = sourceClusterMetadata.isStreamEnabled() ? "1" : "0"; @@ -300,6 +344,7 @@ private DBClusterParameterGroup createDbClusterParameterGroup(NeptuneClusterMeta .build()); if (this.enableAuditLogs) { + logger.debug("Adding neptune_enable_audit_log parameter"); requestBuilder = requestBuilder.parameters(Parameter.builder() .parameterName("neptune_enable_audit_log") .parameterValue("1") @@ -367,16 +412,16 @@ private void createInstance(String name, DBParameterGroup dbParameterGroup, DBCluster targetDbCluster) { + String instanceId = String.format("neptune-export-%s-%s", name, UUID.randomUUID().toString().substring(0, 5)); System.err.println("Creating target " + name + " instance..."); CreateDbInstanceRequest.Builder requestBuilder = CreateDbInstanceRequest.builder() .dbInstanceClass(instanceType.value()) - .dbInstanceIdentifier(String.format("neptune-export-%s-%s", name, UUID.randomUUID().toString().substring(0, 5))) + .dbInstanceIdentifier(instanceId) .dbClusterIdentifier(targetDbCluster.dbClusterIdentifier()) .dbParameterGroupName(dbParameterGroup.dbParameterGroupName()) .engine("neptune") - .tags(getTags(sourceClusterMetadata.clusterId())) - ; + .tags(getTags(sourceClusterMetadata.clusterId())); if (StringUtils.isNotEmpty(engineVersion)) { requestBuilder = requestBuilder.engineVersion(engineVersion); @@ -391,6 +436,7 @@ private void createInstance(String name, // Retry loop with exponential backoff for (int attempt = 0; attempt <= maxRetries; attempt++) { try { + logger.debug("Sending create DB instance request for {}", instanceId); targetDbInstance = neptune.createDBInstance(request).dbInstance(); // If we get here, the request was successful break; @@ -400,8 +446,6 @@ private void createInstance(String name, logger.error("Failed to create {} instance after {} attempts, with error {}", name, maxRetries, e.getMessage()); return; } - - // Calculate backoff time with exponential increase and some jitter long backoffMillis = initialBackoffMillis * (long) Math.pow(2, attempt); @@ -432,11 +476,37 @@ private void createInstance(String name, } catch (InterruptedException e) { e.printStackTrace(); } - instanceStatus = neptune.describeDBInstances(DescribeDbInstancesRequest.builder() - .dbInstanceIdentifier(targetDbInstance.dbInstanceIdentifier()).build()) - .dbInstances() - .get(0) - .dbInstanceStatus(); + + try { + DescribeDbInstancesRequest describeRequest = DescribeDbInstancesRequest.builder() + .dbInstanceIdentifier(targetDbInstance.dbInstanceIdentifier()) + .build(); + + instanceStatus = neptune.describeDBInstances(describeRequest) + .dbInstances() + .get(0) + .dbInstanceStatus(); + + logger.debug("Instance {} status: {}", targetDbInstance.dbInstanceIdentifier(), instanceStatus); + } catch (NeptuneException e) { + logger.error("Error checking instance status: {} (Error code: {}, Message: {})", + targetDbInstance.dbInstanceIdentifier(), e.awsErrorDetails().errorCode(), e.getMessage()); + + if (e.awsErrorDetails().errorCode().equals("DBInstanceNotFound")) { + logger.error("The instance {} was not found. It may have been deleted or failed to create properly.", + targetDbInstance.dbInstanceIdentifier()); + return; + } + } + } + + logger.debug("{} instance {} is now in {} state", + name, targetDbInstance.dbInstanceIdentifier(), instanceStatus); + + // Check if the final status indicates success + if (!instanceStatus.equals("available")) { + logger.warn("{} instance {} is in {} state instead of 'available'", + name, targetDbInstance.dbInstanceIdentifier(), instanceStatus); } } From 07126426cb72b2deece5b81084fc4b145406e502 Mon Sep 17 00:00:00 2001 From: Cole-Greer Date: Tue, 10 Jun 2025 10:22:07 -0700 Subject: [PATCH 2/3] updates from PR feedback --- .../neptune/cluster/AddCloneTask.java | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java index aee341d4..f96cb094 100644 --- a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java +++ b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java @@ -13,19 +13,19 @@ package com.amazonaws.services.neptune.cluster; -import com.amazonaws.services.neptune.io.KinesisConfig; import com.amazonaws.services.neptune.util.Activity; import com.amazonaws.services.neptune.util.Timer; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.SdkRequest; import software.amazon.awssdk.services.neptune.NeptuneClient; import software.amazon.awssdk.services.neptune.model.*; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -148,11 +148,12 @@ private void createReplicas(NeptuneClusterMetadata sourceClusterMetadata, taskExecutor.shutdown(); try { - boolean completed = taskExecutor.awaitTermination(30, TimeUnit.MINUTES); + final int timeoutMin = 30; + boolean completed = taskExecutor.awaitTermination(timeoutMin, TimeUnit.MINUTES); if (completed) { logger.debug("Successfully created all {} replica instances", replicaCount); } else { - logger.warn("Timed out waiting for all replicas to be created after 30 minutes"); + logger.warn("Timed out waiting for all replicas to be created after {} minutes", timeoutMin); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -191,15 +192,11 @@ private DBCluster createCluster(NeptuneClusterMetadata sourceClusterMetadata, logger.debug("Sending restore DB cluster request: {}", request); targetDbCluster = neptune.restoreDBClusterToPointInTime(request).dbCluster(); } catch (NeptuneException e) { - logger.error("Failed to create target cluster: {} (Error code: {}, Message: {})", - targetClusterId, e.awsErrorDetails().errorCode(), e.getMessage()); - - // Log additional details that might help with troubleshooting - logger.debug("Request details: sourceClusterId={}, targetClusterId={}, dbSubnetGroupName={}, vpcSecurityGroupIds={}", - sourceClusterId, targetClusterId, sourceClusterMetadata.dbSubnetGroupName(), - String.join(",", sourceClusterMetadata.vpcSecurityGroupIds())); + logger.error("Failed to create target cluster: {} from source cluster: {} (Error code: {}, Message: {})", + targetClusterId, sourceClusterId, Optional.ofNullable(e.awsErrorDetails()).map(AwsErrorDetails::errorCode).orElse("N/A"), + e.getMessage(), e); - throw new RuntimeException("Failed to create clone cluster: " + e.getMessage(), e); + throw e; } String clusterStatus = targetDbCluster.status(); @@ -317,8 +314,9 @@ private DBClusterParameterGroup createDbClusterParameterGroup(NeptuneClusterMeta logger.debug("Successfully created DB cluster parameter group: {}", dbClusterParameterGroup.dbClusterParameterGroupName()); } catch (NeptuneException e) { logger.error("Failed to create DB cluster parameter group: {} (Error code: {}, Message: {})", - paramGroupName, e.awsErrorDetails().errorCode(), e.getMessage()); - throw new RuntimeException("Failed to create DB cluster parameter group: " + e.getMessage(), e); + paramGroupName, Optional.ofNullable(e.awsErrorDetails()).map(AwsErrorDetails::errorCode).orElse("N/A"), + e.getMessage(), e); + throw e; } String neptuneStreamsParameterValue = sourceClusterMetadata.isStreamEnabled() ? "1" : "0"; @@ -436,7 +434,7 @@ private void createInstance(String name, // Retry loop with exponential backoff for (int attempt = 0; attempt <= maxRetries; attempt++) { try { - logger.debug("Sending create DB instance request for {}", instanceId); + logger.debug("Sending create DB instance request for {} in cluster {}", instanceId, targetDbCluster.dbClusterIdentifier()); targetDbInstance = neptune.createDBInstance(request).dbInstance(); // If we get here, the request was successful break; @@ -495,8 +493,8 @@ private void createInstance(String name, if (e.awsErrorDetails().errorCode().equals("DBInstanceNotFound")) { logger.error("The instance {} was not found. It may have been deleted or failed to create properly.", targetDbInstance.dbInstanceIdentifier()); - return; } + throw e; } } From 031671fd7f88729b16ca870eab941595077b6edf Mon Sep 17 00:00:00 2001 From: Cole-Greer Date: Tue, 10 Jun 2025 16:18:37 -0700 Subject: [PATCH 3/3] extra null handling --- .../com/amazonaws/services/neptune/cluster/AddCloneTask.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java index f96cb094..e0c447fe 100644 --- a/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java +++ b/src/main/java/com/amazonaws/services/neptune/cluster/AddCloneTask.java @@ -488,9 +488,10 @@ private void createInstance(String name, logger.debug("Instance {} status: {}", targetDbInstance.dbInstanceIdentifier(), instanceStatus); } catch (NeptuneException e) { logger.error("Error checking instance status: {} (Error code: {}, Message: {})", - targetDbInstance.dbInstanceIdentifier(), e.awsErrorDetails().errorCode(), e.getMessage()); + targetDbInstance.dbInstanceIdentifier(), Optional.ofNullable(e.awsErrorDetails()).map(AwsErrorDetails::errorCode).orElse("N/A"), + e.getMessage(), e); - if (e.awsErrorDetails().errorCode().equals("DBInstanceNotFound")) { + if (e.awsErrorDetails() != null && e.awsErrorDetails().errorCode() != null && e.awsErrorDetails().errorCode().equals("DBInstanceNotFound")) { logger.error("The instance {} was not found. It may have been deleted or failed to create properly.", targetDbInstance.dbInstanceIdentifier()); }