diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md index 08cdec15bec6..4c06111e924c 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md @@ -3,6 +3,7 @@ ### 4.43.0-beta.1 (Unreleased) #### Features Added +* Added transactional batch support. See [PR 47478](https://github.com/Azure/azure-sdk-for-java/pull/47478) and [PR 47697](https://github.com/Azure/azure-sdk-for-java/pull/47697) and [47803](https://github.com/Azure/azure-sdk-for-java/pull/47803) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md index 7400620d0e2f..9a06400585d0 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md @@ -3,6 +3,7 @@ ### 4.43.0-beta.1 (Unreleased) #### Features Added +* Added transactional batch support. See [PR 47478](https://github.com/Azure/azure-sdk-for-java/pull/47478) and [PR 47697](https://github.com/Azure/azure-sdk-for-java/pull/47697) and [47803](https://github.com/Azure/azure-sdk-for-java/pull/47803) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos-spark_3/docs/configuration-reference.md b/sdk/cosmos/azure-cosmos-spark_3/docs/configuration-reference.md index 5d9907342ba7..d13b5bada3b4 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/docs/configuration-reference.md +++ b/sdk/cosmos/azure-cosmos-spark_3/docs/configuration-reference.md @@ -56,22 +56,24 @@ | `spark.cosmos.disableTcpConnectionEndpointRediscovery` | `false` | Can be used to disable TCP connection endpoint rediscovery. TCP connection endpoint rediscovery should only be disabled when using custom domain names with private endpoints when using a custom Spark environment. When using Azure Databricks or Azure Synapse as Spark runtime it should never be required to disable endpoint rediscovery. | | `spark.cosmos.read.allowInvalidJsonWithDuplicateJsonProperties` | `false` | By default (when set to false) the Cosmos Java SDK and spark connector will raise a hard failure when json documents are read that contain json object with multiple properties of the same name. This config option can be used to override the behavior and silently ignore the invalid json and instead use the last occurrence of the property when parsing the json. NOTE: This is only meant to be used as a temporary workaround. We strongly recommend fixing the invalid json from even being ingested into the data and only use this workaround while cleaning up the documents with invalid json. | | `spark.cosmos.proactiveConnectionInitialization` | None | Can be used to define a list (semicolon separated) of `DB/Container` pairs. Connections for these containers will be proactively warmed-up when using direct mode. The format of the config would be `DB1/Collection1;DB2/Collection2` etc. | -| `spark.cosmos.proactiveConnectionInitializationDurationInSeconds` | `120` | The maximum duration for which the client when being initialized would aggressively try to warm-up collections. After this time perios the warm-up will happen only slowly (on one background thread). | +| `spark.cosmos.proactiveConnectionInitializationDurationInSeconds` | `120` | The maximum duration for which the client when being initialized would aggressively try to warm-up collections. After this time perios the warm-up will happen only slowly (on one background thread). | | `spark.cosmos.metadata.feedRange.refreshIntervalInSeconds` | `120` | The time interval in seconds to refresh the internal partition key range cache, valid between `[60, 1800]`. By default it is 120 seconds. | ### Write Config -| Config Property Name | Default | Description | -|:----------------------------------------------------------------|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `spark.cosmos.write.strategy` | `ItemOverwrite` | Cosmos DB Item write Strategy:
- `ItemOverwrite` (using upsert),
- `ItemOverwriteIfNotModified` (if etag property of the row is empty/null it will just do an insert and ignore if the document already exists - same as `ItemAppend`, if an etag value exists it will attempt to replace the document with etag pre-condition. If the document changed - identified by precondition failure - the update is skipped and the document is not updated with the content of the data frame row),
- `ItemAppend` (using create, ignore pre-existing items i.e., Conflicts),
- `ItemDelete` (delete all documents),
- `ItemDeleteIfNotModified` (delete all documents for which the etag hasn't changed),
- `ItemPatch` and `ItemPatchIfExists` (Partial update all documents based on the patch config, `ItemPatch` will fail the spark job when hitting 404/Not Found - while `ItemPatchIfExists` will skip documents that don't exist gracefully.),
- `ItemBulkUpdate` (read item, then patch the item locally, then using create if etag is empty, update/replace with etag pre-condition. In cases of any conflict or precondition failure, SDK will retry the above steps to update the documents properly.) | -| `spark.cosmos.write.maxRetryCount` | `10` | Cosmos DB Write Max Retry Attempts on retriable failures (e.g., connection error, moderakh add more details) | -| `spark.cosmos.write.point.maxConcurrency` | None | Cosmos DB Item Write Max concurrency. If not specified it will be determined based on the Spark executor VM Size | -| `spark.cosmos.write.bulk.maxPendingOperations` | None | Cosmos DB Item Write bulk mode maximum pending operations. Defines a limit of bulk operations being processed concurrently. If not specified it will be determined based on the Spark executor VM Size. If the volume of data is large for the provisioned throughput on the destination container, this setting can be adjusted by following the estimation of `1000 x Cores` | -| `spark.cosmos.write.bulk.enabled` | `true` | Cosmos DB Item Write bulk enabled | -| `spark.cosmos.write.bulk.transactional` | `false` | Enable transactional batch mode for bulk writes. When enabled, all operations for the same partition key are executed atomically (all succeed or all fail). Requires ordering and clustering by partition key columns. Only supports upsert operations. Cannot exceed 100 operations or 2MB per partition key. **Note**: For containers using hierarchical partition keys (HPK), transactional scope applies only to **logical partitions** (complete partition key paths), not partial top-level keys. See [Transactional Batch documentation](https://learn.microsoft.com/azure/cosmos-db/transactional-batch) for details. |\n| `spark.cosmos.write.bulk.targetedPayloadSizeInBytes` | `220201` | When the targeted payload size is reached for buffered documents, the request is sent to the backend. The default value is optimized for small documents <= 10 KB - when documents often exceed 110 KB, it can help to increase this value to up to about `1500000` (should still be smaller than 2 MB). | -| `spark.cosmos.write.bulk.initialBatchSize` | `100` | Cosmos DB initial bulk micro batch size - a micro batch will be flushed to the backend when the number of documents enqueued exceeds this size - or the target payload size is met. The micro batch size is getting automatically tuned based on the throttling rate. By default the initial micro batch size is 100. Reduce this when you want to avoid that the first few requests consume too many RUs. | -| `spark.cosmos.write.bulk.maxBatchSize` | `100` | Cosmos DB max. bulk micro batch size - a micro batch will be flushed to the backend when the number of documents enqueued exceeds this size - or the target payload size is met. The micro batch size is getting automatically tuned based on the throttling rate. By default the max. micro batch size is 100. Use this setting only when migrating Spark 2.4 workloads - for other scenarios relying on the auto-tuning combined with throughput control will result in better experience. | -| `spark.cosmos.write.flush.noProgress.maxIntervalInSeconds` | `180` | The time interval in seconds that write operations will wait when no progress can be made for bulk writes before forcing a retry. The retry will reinitialize the bulk write process - so, any delays on the retry can be sure to be actual service issues. The default value of 3 min should be sufficient to prevent false negatives when there is a short service-side write unavailability - like for partition splits or merges. Increase it only if you regularly see these transient errors to exceed a time period of 180 seconds. | -| `spark.cosmos.write.flush.noProgress.maxRetryIntervalInSeconds` | `2700` | The time interval in seconds that write operations will wait when no progress can be made for bulk writes after the initial attempt (and restarting the bulk writer client-side). This time interval is supposed to be large enough to not fail Spark jobs even when there are transient write availability outages in the service. The default value of 45 minutes can be modified when you rather prefer Spark jobs to fail or extended when needed. | +| Config Property Name | Default | Description | +|:-----------------------------------------------------------------|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `spark.cosmos.write.strategy` | `ItemOverwrite` | Cosmos DB Item write Strategy:
- `ItemOverwrite` (using upsert),
- `ItemOverwriteIfNotModified` (if etag property of the row is empty/null it will just do an insert and ignore if the document already exists - same as `ItemAppend`, if an etag value exists it will attempt to replace the document with etag pre-condition. If the document changed - identified by precondition failure - the update is skipped and the document is not updated with the content of the data frame row),
- `ItemAppend` (using create, ignore pre-existing items i.e., Conflicts),
- `ItemDelete` (delete all documents),
- `ItemDeleteIfNotModified` (delete all documents for which the etag hasn't changed),
- `ItemPatch` and `ItemPatchIfExists` (Partial update all documents based on the patch config, `ItemPatch` will fail the spark job when hitting 404/Not Found - while `ItemPatchIfExists` will skip documents that don't exist gracefully.),
- `ItemBulkUpdate` (read item, then patch the item locally, then using create if etag is empty, update/replace with etag pre-condition. In cases of any conflict or precondition failure, SDK will retry the above steps to update the documents properly.) | +| `spark.cosmos.write.maxRetryCount` | `10` | Cosmos DB Write Max Retry Attempts on retriable failures (e.g., connection error, moderakh add more details) | +| `spark.cosmos.write.point.maxConcurrency` | None | Cosmos DB Item Write Max concurrency. If not specified it will be determined based on the Spark executor VM Size | +| `spark.cosmos.write.bulk.maxPendingOperations` | None | Cosmos DB Item Write bulk mode maximum pending operations. Defines a limit of bulk operations being processed concurrently. If not specified it will be determined based on the Spark executor VM Size. If the volume of data is large for the provisioned throughput on the destination container, this setting can be adjusted by following the estimation of `1000 x Cores` | +| `spark.cosmos.write.bulk.enabled` | `true` | Cosmos DB Item Write bulk enabled | +| `spark.cosmos.write.bulk.targetedPayloadSizeInBytes` | `220201` | When the targeted payload size is reached for buffered documents, the request is sent to the backend. The default value is optimized for small documents <= 10 KB - when documents often exceed 110 KB, it can help to increase this value to up to about `1500000` (should still be smaller than 2 MB). | +| `spark.cosmos.write.bulk.initialBatchSize` | `100` | Cosmos DB initial bulk micro batch size - a micro batch will be flushed to the backend when the number of documents enqueued exceeds this size - or the target payload size is met. The micro batch size is getting automatically tuned based on the throttling rate. By default the initial micro batch size is 100. Reduce this when you want to avoid that the first few requests consume too many RUs. | +| `spark.cosmos.write.bulk.maxBatchSize` | `100` | Cosmos DB max. bulk micro batch size - a micro batch will be flushed to the backend when the number of documents enqueued exceeds this size - or the target payload size is met. The micro batch size is getting automatically tuned based on the throttling rate. By default the max. micro batch size is 100. Use this setting only when migrating Spark 2.4 workloads - for other scenarios relying on the auto-tuning combined with throughput control will result in better experience. | +| `spark.cosmos.write.flush.noProgress.maxIntervalInSeconds` | `180` | The time interval in seconds that write operations will wait when no progress can be made for bulk writes before forcing a retry. The retry will reinitialize the bulk write process - so, any delays on the retry can be sure to be actual service issues. The default value of 3 min should be sufficient to prevent false negatives when there is a short service-side write unavailability - like for partition splits or merges. Increase it only if you regularly see these transient errors to exceed a time period of 180 seconds. | +| `spark.cosmos.write.flush.noProgress.maxRetryIntervalInSeconds` | `2700` | The time interval in seconds that write operations will wait when no progress can be made for bulk writes after the initial attempt (and restarting the bulk writer client-side). This time interval is supposed to be large enough to not fail Spark jobs even when there are transient write availability outages in the service. The default value of 45 minutes can be modified when you rather prefer Spark jobs to fail or extended when needed. | +| `spark.cosmos.write.bulk.transactional.maxOperationsConcurrency` | `100` | Max number of in-flight operations per Cosmos partition for transactional bulk mode. Higher values increase parallelism (and RU usage) but can cause throttling; default ~100. | | +| `spark.cosmos.write.bulk.transactional.maxBatchesConcurrency` | `5` | Max concurrent transactional batches per Cosmos partition (1..5). Controls batch-level parallelism; default 5. Each batch may contain multiple operations; tune together with 'spark.cosmos.write.bulk.transactional.maxOperationsConcurrency' to balance throughput and throttling. | | #### Patch Config | Config Property Name | Default | Description | diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/BulkWriter.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/BulkWriter.scala index 1ae62d5c2b92..870b636e104f 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/BulkWriter.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/BulkWriter.scala @@ -69,7 +69,9 @@ private class BulkWriter // so multiplying by cpuCount in the default config is too aggressive private val maxPendingOperations = writeConfig.bulkMaxPendingOperations .getOrElse(DefaultMaxPendingOperationPerCore) - private val maxConcurrentPartitions = writeConfig.maxConcurrentCosmosPartitions match { + + private val bulkExecutionConfigs = writeConfig.bulkExecutionConfigs.get.asInstanceOf[CosmosWriteBulkExecutionConfigs] + private val maxConcurrentPartitions = bulkExecutionConfigs.maxConcurrentCosmosPartitions match { // using the provided maximum of concurrent partitions per Spark partition on the input data // multiplied by 2 to leave space for partition splits during ingestion case Some(configuredMaxConcurrentPartitions) => 2 * configuredMaxConcurrentPartitions @@ -146,20 +148,20 @@ private class BulkWriter ThroughputControlHelper.populateThroughputControlGroupName(cosmosBulkExecutionOptions, writeConfig.throughputControlConfig) - writeConfig.maxMicroBatchPayloadSizeInBytes match { + bulkExecutionConfigs.maxMicroBatchPayloadSizeInBytes match { case Some(customMaxMicroBatchPayloadSizeInBytes) => cosmosBulkExecutionOptionsImpl .setMaxMicroBatchPayloadSizeInBytes(customMaxMicroBatchPayloadSizeInBytes) case None => } - writeConfig.initialMicroBatchSize match { + bulkExecutionConfigs.initialMicroBatchSize match { case Some(customInitialMicroBatchSize) => cosmosBulkExecutionOptions.setInitialMicroBatchSize(Math.max(1, customInitialMicroBatchSize)) case None => } - writeConfig.maxMicroBatchSize match { + bulkExecutionConfigs.maxMicroBatchSize match { case Some(customMaxMicroBatchSize) => cosmosBulkExecutionOptions.setMaxMicroBatchSize( Math.max( @@ -267,7 +269,7 @@ private class BulkWriter // We start from using the bulk batch size and interval and concurrency // If in the future, there is a need to separate the configuration, can re-consider - val bulkBatchSize = writeConfig.maxMicroBatchSize match { + val bulkBatchSize = bulkExecutionConfigs.maxMicroBatchSize match { case Some(customMaxMicroBatchSize) => Math.min( BatchRequestResponseConstants.MAX_OPERATIONS_IN_DIRECT_MODE_BATCH_REQUEST, Math.max(1, customMaxMicroBatchSize)) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala index f28e459b2d83..1692d947f74a 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala @@ -120,6 +120,8 @@ private[spark] object CosmosConfigNames { val WriteBulkMaxConcurrentPartitions = "spark.cosmos.write.bulk.maxConcurrentCosmosPartitions" val WriteBulkPayloadSizeInBytes = "spark.cosmos.write.bulk.targetedPayloadSizeInBytes" val WriteBulkInitialBatchSize = "spark.cosmos.write.bulk.initialBatchSize" + val WriteBulkTransactionalMaxOperationsConcurrency = "spark.cosmos.write.bulk.transactional.maxOperationsConcurrency" + val WriteBulkTransactionalMaxBatchesConcurrency = "spark.cosmos.write.bulk.transactional.maxBatchesConcurrency" val WritePointMaxConcurrency = "spark.cosmos.write.point.maxConcurrency" val WritePatchDefaultOperationType = "spark.cosmos.write.patch.defaultOperationType" val WritePatchColumnConfigs = "spark.cosmos.write.patch.columnConfigs" @@ -248,6 +250,8 @@ private[spark] object CosmosConfigNames { WriteBulkMaxConcurrentPartitions, WriteBulkPayloadSizeInBytes, WriteBulkInitialBatchSize, + WriteBulkTransactionalMaxOperationsConcurrency, + WriteBulkTransactionalMaxBatchesConcurrency, WriteBulkMaxBatchSize, WriteBulkMinTargetBatchSize, WritePointMaxConcurrency, @@ -1464,21 +1468,31 @@ private case class CosmosPatchConfigs(columnConfigsMap: TrieMap[String, CosmosPa private case class CosmosWriteConfig(itemWriteStrategy: ItemWriteStrategy, maxRetryCount: Int, bulkEnabled: Boolean, - bulkTransactional: Boolean = false, + bulkTransactional: Boolean, + bulkExecutionConfigs: Option[CosmosWriteBulkExecutionConfigsBase] = None, bulkMaxPendingOperations: Option[Int] = None, pointMaxConcurrency: Option[Int] = None, - maxConcurrentCosmosPartitions: Option[Int] = None, patchConfigs: Option[CosmosPatchConfigs] = None, throughputControlConfig: Option[CosmosThroughputControlConfig] = None, - maxMicroBatchPayloadSizeInBytes: Option[Int] = None, - initialMicroBatchSize: Option[Int] = None, - maxMicroBatchSize: Option[Int] = None, - minTargetMicroBatchSize: Option[Int] = None, flushCloseIntervalInSeconds: Int = 60, maxInitialNoProgressIntervalInSeconds: Int = 180, maxRetryNoProgressIntervalInSeconds: Int = 45 * 60, retryCommitInterceptor: Option[WriteOnRetryCommitInterceptor] = None) +private trait CosmosWriteBulkExecutionConfigsBase {} + +private case class CosmosWriteBulkExecutionConfigs( + maxConcurrentCosmosPartitions: Option[Int] = None, + maxMicroBatchPayloadSizeInBytes: Option[Int] = None, + initialMicroBatchSize: Option[Int] = None, + maxMicroBatchSize: Option[Int] = None, + minTargetMicroBatchSize: Option[Int] = None) extends CosmosWriteBulkExecutionConfigsBase + +private case class CosmosWriteTransactionalBulkExecutionConfigs( + maxConcurrentCosmosPartitions: Option[Int] = None, + maxConcurrentOperations: Option[Int] = None, + maxConcurrentBatches: Option[Int] = None) extends CosmosWriteBulkExecutionConfigsBase + private object CosmosWriteConfig { private val DefaultMaxRetryCount = 10 private val DefaultPatchOperationType = CosmosPatchOperationTypes.Replace @@ -1554,6 +1568,22 @@ private object CosmosWriteConfig { s" repartitioned to balance to how many Cosmos partitions each Spark partition needs to write. This is mainly" + s" useful for very large containers (with hundreds of physical partitions).") + private val bulkTransactionalMaxOpsConcurrency = CosmosConfigEntry[Int]( + key = CosmosConfigNames.WriteBulkTransactionalMaxOperationsConcurrency, + defaultValue = Option.apply(BatchRequestResponseConstants.DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_OP_CONCURRENCY), + mandatory = false, + parseFromStringFunction = maxOpsConcurrency => maxOpsConcurrency.toInt, + helpMessage = "Max number of in-flight operations per Cosmos partition for transactional bulk mode. " + + "Higher values increase parallelism (and RU usage) but can cause throttling; default ~100.") + + private val bulkTransactionalMaxBatchesConcurrency = CosmosConfigEntry[Int]( + key = CosmosConfigNames.WriteBulkTransactionalMaxBatchesConcurrency, + defaultValue = Option.apply(BatchRequestResponseConstants.DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_CONCURRENCY), + mandatory = false, + parseFromStringFunction = maxBatchesConcurrency => maxBatchesConcurrency.toInt, + helpMessage = "Max concurrent transactional batches per Cosmos partition (1..5). Controls batch-level parallelism; default 5." + + "Each batch may contain multiple operations; tune together with 'spark.cosmos.write.bulk.transactional.maxOperationsConcurrency' to balance throughput and throttling.") + private val pointWriteConcurrency = CosmosConfigEntry[Int](key = CosmosConfigNames.WritePointMaxConcurrency, mandatory = false, parseFromStringFunction = bulkMaxConcurrencyAsString => bulkMaxConcurrencyAsString.toInt, @@ -1772,10 +1802,7 @@ private object CosmosWriteConfig { val bulkTransactionalOpt = CosmosConfigEntry.parse(cfg, bulkTransactional) var patchConfigsOpt = Option.empty[CosmosPatchConfigs] val throughputControlConfigOpt = CosmosThroughputControlConfig.parseThroughputControlConfig(cfg) - val microBatchPayloadSizeInBytesOpt = CosmosConfigEntry.parse(cfg, microBatchPayloadSizeInBytes) - val initialBatchSizeOpt = CosmosConfigEntry.parse(cfg, initialMicroBatchSize) - val maxBatchSizeOpt = CosmosConfigEntry.parse(cfg, maxMicroBatchSize) - val minTargetBatchSizeOpt = CosmosConfigEntry.parse(cfg, minTargetMicroBatchSize) + val writeRetryCommitInterceptor = CosmosConfigEntry .parse(cfg, writeOnRetryCommitInterceptor).flatten @@ -1785,12 +1812,6 @@ private object CosmosWriteConfig { assert(itemWriteStrategyOpt.isDefined, s"Parameter '${CosmosConfigNames.WriteStrategy}' is missing.") assert(maxRetryCountOpt.isDefined, s"Parameter '${CosmosConfigNames.WriteMaxRetryCount}' is missing.") - if (bulkTransactionalOpt.isDefined && bulkTransactionalOpt.get) { - // Validate write strategy for transactional batches - assert(itemWriteStrategyOpt.get == ItemWriteStrategy.ItemOverwrite, - s"Transactional batches only support ItemOverwrite (upsert) write strategy. Requested: ${itemWriteStrategyOpt.get}") - } - itemWriteStrategyOpt.get match { case ItemWriteStrategy.ItemPatch | ItemWriteStrategy.ItemPatchIfExists => val patchColumnConfigMap = parsePatchColumnConfigs(cfg, inputSchema) @@ -1802,20 +1823,51 @@ private object CosmosWriteConfig { case _ => } + var bulkExecutionConfigsOpt: Option[CosmosWriteBulkExecutionConfigsBase] = None + if (bulkEnabledOpt.isDefined && bulkEnabledOpt.get) { + + if (bulkTransactionalOpt.isDefined && bulkTransactionalOpt.get) { + // Validate write strategy for transactional batches + assert(itemWriteStrategyOpt.get == ItemWriteStrategy.ItemOverwrite, + s"Transactional batches only support ItemOverwrite (upsert) write strategy. Requested: ${itemWriteStrategyOpt.get}") + + val maxConcurrentCosmosPartitionsOpt = CosmosConfigEntry.parse(cfg, bulkMaxConcurrentPartitions) + val maxBulkTransactionalOpsConcurrencyOpt = CosmosConfigEntry.parse(cfg, bulkTransactionalMaxOpsConcurrency) + val maxBulkTransactionalBatchesConcurrencyOpt = CosmosConfigEntry.parse(cfg, bulkTransactionalMaxBatchesConcurrency) + + bulkExecutionConfigsOpt = Some(CosmosWriteTransactionalBulkExecutionConfigs( + maxConcurrentCosmosPartitionsOpt, + maxBulkTransactionalOpsConcurrencyOpt, + maxBulkTransactionalBatchesConcurrencyOpt + )) + + } else { + // non-transactional batch + val maxConcurrentCosmosPartitionsOpt = CosmosConfigEntry.parse(cfg, bulkMaxConcurrentPartitions) + val microBatchPayloadSizeInBytesOpt = CosmosConfigEntry.parse(cfg, microBatchPayloadSizeInBytes) + val initialBatchSizeOpt = CosmosConfigEntry.parse(cfg, initialMicroBatchSize) + val maxBatchSizeOpt = CosmosConfigEntry.parse(cfg, maxMicroBatchSize) + val minTargetBatchSizeOpt = CosmosConfigEntry.parse(cfg, minTargetMicroBatchSize) + + bulkExecutionConfigsOpt = Some(CosmosWriteBulkExecutionConfigs( + maxConcurrentCosmosPartitionsOpt, + microBatchPayloadSizeInBytesOpt, + initialBatchSizeOpt, + maxBatchSizeOpt, + minTargetBatchSizeOpt)) + } + } + CosmosWriteConfig( itemWriteStrategyOpt.get, maxRetryCountOpt.get, bulkEnabled = bulkEnabledOpt.get, bulkTransactional = bulkTransactionalOpt.get, + bulkExecutionConfigs = bulkExecutionConfigsOpt, bulkMaxPendingOperations = CosmosConfigEntry.parse(cfg, bulkMaxPendingOperations), pointMaxConcurrency = CosmosConfigEntry.parse(cfg, pointWriteConcurrency), - maxConcurrentCosmosPartitions = CosmosConfigEntry.parse(cfg, bulkMaxConcurrentPartitions), patchConfigs = patchConfigsOpt, throughputControlConfig = throughputControlConfigOpt, - maxMicroBatchPayloadSizeInBytes = microBatchPayloadSizeInBytesOpt, - initialMicroBatchSize = initialBatchSizeOpt, - maxMicroBatchSize = maxBatchSizeOpt, - minTargetMicroBatchSize = minTargetBatchSizeOpt, flushCloseIntervalInSeconds = CosmosConfigEntry.parse(cfg, flushCloseIntervalInSeconds).get, maxInitialNoProgressIntervalInSeconds = CosmosConfigEntry.parse(cfg, maxInitialNoProgressIntervalInSeconds).get, maxRetryNoProgressIntervalInSeconds = CosmosConfigEntry.parse(cfg, maxRetryNoProgressIntervalInSeconds).get, diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransactionalBulkWriter.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransactionalBulkWriter.scala index f50c838c4dcf..13b47fe87a48 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransactionalBulkWriter.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransactionalBulkWriter.scala @@ -3,12 +3,13 @@ package com.azure.cosmos.spark // scalastyle:off underscore.import -import com.azure.cosmos.{BridgeInternal, CosmosAsyncContainer, CosmosDiagnosticsContext, CosmosEndToEndOperationLatencyPolicyConfigBuilder, CosmosException} -import com.azure.cosmos.implementation.batch.{BulkExecutorDiagnosticsTracker, TransactionalBulkExecutor} +import com.azure.cosmos.implementation.batch.{BulkExecutorDiagnosticsTracker, CosmosBulkTransactionalBatchResponse, TransactionalBulkExecutor} import com.azure.cosmos.implementation.{CosmosTransactionalBulkExecutionOptionsImpl, UUIDs} import com.azure.cosmos.models.{CosmosBatch, CosmosBatchResponse} -import com.azure.cosmos.spark.TransactionalBulkWriter.{BulkOperationFailedException, DefaultMaxPendingOperationPerCore, emitFailureHandler, getThreadInfo, transactionalBatchInputBoundedElastic, transactionalBulkWriterInputBoundedElastic, transactionalBulkWriterRequestsBoundedElastic} +import com.azure.cosmos.spark.BulkWriter.getThreadInfo +import com.azure.cosmos.spark.TransactionalBulkWriter.{BulkOperationFailedException, DefaultMaxPendingOperationPerCore, emitFailureHandler, transactionalBatchInputBoundedElastic, transactionalBulkWriterInputBoundedElastic, transactionalBulkWriterRequestsBoundedElastic} import com.azure.cosmos.spark.diagnostics.DefaultDiagnostics +import com.azure.cosmos.{BridgeInternal, CosmosAsyncContainer, CosmosDiagnosticsContext, CosmosEndToEndOperationLatencyPolicyConfigBuilder, CosmosException} import reactor.core.Scannable import reactor.core.scala.publisher.SMono.PimpJFlux import reactor.core.scheduler.Scheduler @@ -68,7 +69,9 @@ private class TransactionalBulkWriter } private val maxPendingOperations = writeConfig.bulkMaxPendingOperations .getOrElse(DefaultMaxPendingOperationPerCore) - private val maxConcurrentPartitions = writeConfig.maxConcurrentCosmosPartitions match { + + private val transactionalBulkExecutionConfigs = writeConfig.bulkExecutionConfigs.get.asInstanceOf[CosmosWriteTransactionalBulkExecutionConfigs] + private val maxConcurrentPartitions = transactionalBulkExecutionConfigs.maxConcurrentCosmosPartitions match { // using the provided maximum of concurrent partitions per Spark partition on the input data // multiplied by 2 to leave space for partition splits during ingestion case Some(configuredMaxConcurrentPartitions) => 2 * configuredMaxConcurrentPartitions @@ -104,12 +107,18 @@ private class TransactionalBulkWriter private val endToEndTimeoutPolicy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(maxOperationTimeout) .enable(true) .build - private val cosmosTransactionalBulkExecutionOptions = new CosmosTransactionalBulkExecutionOptionsImpl(Map.empty[String, String].asJava) + private val cosmosTransactionalBulkExecutionOptions = new CosmosTransactionalBulkExecutionOptionsImpl() private val monotonicOperationCounter = new AtomicLong(0) cosmosTransactionalBulkExecutionOptions.setSchedulerOverride(transactionalBulkWriterRequestsBoundedElastic) cosmosTransactionalBulkExecutionOptions.setMaxConcurrentCosmosPartitions(maxConcurrentPartitions) cosmosTransactionalBulkExecutionOptions.setCosmosEndToEndLatencyPolicyConfig(endToEndTimeoutPolicy) + if (transactionalBulkExecutionConfigs.maxConcurrentOperations.isDefined) { + cosmosTransactionalBulkExecutionOptions.setMaxOperationsConcurrency(transactionalBulkExecutionConfigs.maxConcurrentOperations.get) + } + if (transactionalBulkExecutionConfigs.maxConcurrentBatches.isDefined) { + cosmosTransactionalBulkExecutionOptions.setMaxBatchesConcurrency(transactionalBulkExecutionConfigs.maxConcurrentBatches.get) + } private class ForwardingMetricTracker(val verboseLoggingEnabled: AtomicBoolean) extends BulkExecutorDiagnosticsTracker { override def trackDiagnostics(ctx: CosmosDiagnosticsContext): Unit = { @@ -245,12 +254,12 @@ private class TransactionalBulkWriter log.logError(s"Batch input publishing flux failed, Context: ${operationContext.toString} $getThreadInfo", t) }) - val transactionalExecutor = new TransactionalBulkExecutor[Object]( + val transactionalExecutor = new TransactionalBulkExecutor( container, batchInputFlux, cosmosTransactionalBulkExecutionOptions) - val batchResponseFlux: SFlux[CosmosBatchResponse] = transactionalExecutor.execute().asScala + val batchResponseFlux: SFlux[CosmosBulkTransactionalBatchResponse] = transactionalExecutor.execute().asScala batchResponseFlux.subscribe( resp => { @@ -259,7 +268,7 @@ private class TransactionalBulkWriter try { // all the operations in the batch will have the same partition key value // get the partition key value from the first result - val partitionKeyValue = resp.getResults.get(0).getOperation.getPartitionKeyValue + val partitionKeyValue = resp.getCosmosBatch.getPartitionKeyValue val activeBatchOperationOpt = activeBatches.remove(partitionKeyValue) val pendingBatchOperationRetriesOpt = pendingBatchRetries.remove(partitionKeyValue) @@ -277,17 +286,35 @@ private class TransactionalBulkWriter if (activeBatchOperationOpt.isDefined || pendingBatchOperationRetriesOpt.isDefined) { val batchOperation = activeBatchOperationOpt.orElse(pendingBatchOperationRetriesOpt).get - if (isSuccessStatusCode(resp.getStatusCode)) { - // no error cases - outputMetricsPublisher.trackWriteOperation(resp.size(), None) // TODO[Annie]:verify the diagnostics - totalSuccessfulIngestionMetrics.addAndGet(resp.size()) - } else { + if (resp.getException != null) { + Option(resp.getException) match { + case Some(cosmosException: CosmosException) => + handleNonSuccessfulStatusCode( + batchOperation.operationContext, + batchOperation.cosmosBatch, + None, + isGettingRetried, + Some(cosmosException)) + case _ => + log.logWarning( + s"unexpected failure: partitionKeyValue=[" + + s"${batchOperation.operationContext}], encountered , attemptNumber=${batchOperation.operationContext.attemptNumber}, " + + s"exceptionMessage=${resp.getException.getMessage}, " + + s"Context: ${operationContext.toString} $getThreadInfo", resp.getException) + captureIfFirstFailure(resp.getException) + cancelWork() + } + } else if (!resp.getResponse.isSuccessStatusCode) { handleNonSuccessfulStatusCode( batchOperation.operationContext, batchOperation.cosmosBatch, - resp, + Some(resp.getResponse), isGettingRetried, None) + } else { + // no error case + outputMetricsPublisher.trackWriteOperation(resp.getResponse.size(), None) + totalSuccessfulIngestionMetrics.addAndGet(resp.getResponse.size()) } } } @@ -319,8 +346,6 @@ private class TransactionalBulkWriter ) } - def isSuccessStatusCode(statusCode: Int): Boolean = 200 <= statusCode && statusCode <= 299 - override def scheduleWrite(partitionKeyValue: PartitionKey, objectNode: ObjectNode): Unit = { Preconditions.checkState(!closed.get()) throwIfCapturedExceptionExists() @@ -400,13 +425,34 @@ private class TransactionalBulkWriter ( operationContext: OperationContext, cosmosBatch: CosmosBatch, - cosmosBatchResponse: CosmosBatchResponse, + cosmosBatchResponse: Option[CosmosBatchResponse], isGettingRetried: AtomicBoolean, responseException: Option[CosmosException] ) : Unit = { - val effectiveStatusCode = cosmosBatchResponse.getStatusCode - val effectiveSubStatusCode = cosmosBatchResponse.getSubStatusCode + val exceptionMessage = cosmosBatchResponse match { + case Some(r) => r.getErrorMessage + case None => responseException match { + case Some(e) => e.getMessage + case None => "" + } + } + + val effectiveStatusCode = cosmosBatchResponse match { + case Some(r) => r.getStatusCode + case None => responseException match { + case Some(e) => e.getStatusCode + case None => CosmosConstants.StatusCodes.Timeout + } + } + + val effectiveSubStatusCode = cosmosBatchResponse match { + case Some(r) => r.getSubStatusCode + case None => responseException match { + case Some(e) => e.getSubStatusCode + case None => 0 + } + } log.logDebug(s"encountered batch operation response with status code " + s"$effectiveStatusCode:$effectiveSubStatusCode, " + @@ -416,7 +462,7 @@ private class TransactionalBulkWriter // requeue log.logWarning(s"for partitionKeyValue=[${operationContext.partitionKeyValueInput}], " + s"encountered status code '$effectiveStatusCode:$effectiveSubStatusCode', will retry! " + - s"attemptNumber=${operationContext.attemptNumber}, exceptionMessage=${cosmosBatchResponse.getErrorMessage}, " + + s"attemptNumber=${operationContext.attemptNumber}, exceptionMessage=${exceptionMessage}, " + s"Context: {${operationContext.toString}} $getThreadInfo") val batchOperationRetry = CosmosBatchOperation( @@ -428,7 +474,7 @@ private class TransactionalBulkWriter ) this.scheduleRetry( - trackPendingRetryAction = () => pendingBatchRetries.put(cosmosBatch.getPartitionKeyValue, batchOperationRetry).isDefined, + trackPendingRetryAction = () => pendingBatchRetries.put(cosmosBatch.getPartitionKeyValue, batchOperationRetry).isEmpty, clearPendingRetryAction = () => pendingBatchRetries.remove(cosmosBatch.getPartitionKeyValue).isDefined, batchOperationRetry, effectiveStatusCode) @@ -437,7 +483,7 @@ private class TransactionalBulkWriter } else { log.logError(s"for partitionKeyValue=[${operationContext.partitionKeyValueInput}], " + s"encountered status code '$effectiveStatusCode:$effectiveSubStatusCode', all retries exhausted! " + - s"attemptNumber=${operationContext.attemptNumber}, exceptionMessage=${cosmosBatchResponse.getErrorMessage}, " + + s"attemptNumber=${operationContext.attemptNumber}, exceptionMessage=${exceptionMessage}, " + s"Context: {${operationContext.toString} $getThreadInfo") val message = s"All retries exhausted for batch operation - " + diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/BulkWriterITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/BulkWriterITest.scala index 9ca7f459cf15..43538a033c36 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/BulkWriterITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/BulkWriterITest.scala @@ -32,7 +32,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemOverwrite, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val metricsPublisher = new TestOutputMetricsPublisher val bulkWriter = new BulkWriter( @@ -74,7 +80,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemOverwrite, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val bulkWriter = new BulkWriter( container, @@ -123,7 +135,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab try { val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemOverwrite, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val bulkWriter = new BulkWriter( container, @@ -190,7 +208,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab try { val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemAppend, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val metricsPublisher = new TestOutputMetricsPublisher val bulkWriter = new BulkWriter( @@ -247,7 +271,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemOverwrite, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val metricsPublisher = new TestOutputMetricsPublisher val bulkWriter = new BulkWriter( @@ -285,6 +315,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemDelete, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900)) val bulkDeleter = new BulkWriter( @@ -316,7 +348,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemOverwrite, + 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val bulkWriter = new BulkWriter( container, @@ -382,6 +420,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemDeleteIfNotModified, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900)) val bulkDeleter = new BulkWriter( @@ -408,7 +448,13 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val containerConfig = CosmosContainerConfig(container.getDatabase.getId, container.getId, None) - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 5, bulkEnabled = true, bulkMaxPendingOperations = Some(900)) + val writeConfig = CosmosWriteConfig( + ItemWriteStrategy.ItemAppend, + maxRetryCount = 5, + bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), + bulkMaxPendingOperations = Some(900)) val bulkWriter = new BulkWriter( container, containerConfig, @@ -448,6 +494,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwriteIfNotModified, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -572,6 +620,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -702,6 +752,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -782,6 +834,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -866,6 +920,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -927,6 +983,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1005,6 +1063,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1067,6 +1127,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1144,6 +1206,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1206,6 +1270,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1315,6 +1381,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig]()))) @@ -1362,6 +1430,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig]()))) @@ -1410,6 +1480,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1491,6 +1563,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1548,6 +1622,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -1613,6 +1689,8 @@ class BulkWriterITest extends IntegrationSpec with CosmosClient with AutoCleanab ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala index 0144b468582b..0b580fd7e367 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala @@ -728,6 +728,26 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait { config.maxRetryNoProgressIntervalInSeconds shouldEqual 45 * 60 } + it should "parse transactional bulk write configs" in { + val userConfig = Map( + "spark.cosmos.write.strategy" -> "ItemOverwrite", + "spark.cosmos.write.bulk.enabled" -> "true", + "spark.cosmos.write.bulk.transactional" -> "true", + "spark.cosmos.write.bulk.transactional.maxOperationsConcurrency" -> "123", + "spark.cosmos.write.bulk.transactional.maxBatchesConcurrency" -> "5" + ) + + val config = CosmosWriteConfig.parseWriteConfig(userConfig, StructType(Nil)) + + config.bulkTransactional shouldEqual true + config.bulkExecutionConfigs.isDefined shouldEqual true + val txConfigs = config.bulkExecutionConfigs.get.asInstanceOf[CosmosWriteTransactionalBulkExecutionConfigs] + txConfigs.maxConcurrentOperations.isDefined shouldEqual true + txConfigs.maxConcurrentOperations.get shouldEqual 123 + txConfigs.maxConcurrentBatches.isDefined shouldEqual true + txConfigs.maxConcurrentBatches.get shouldEqual 5 + } + it should "parse partitioning config with custom Strategy" in { val partitioningConfig = Map( "spark.cosmos.read.partitioning.strategy" -> "Custom", @@ -976,9 +996,11 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait { ) var writeConfig: CosmosWriteConfig = CosmosWriteConfig.parseWriteConfig(userConfig, schema) writeConfig should not be null - writeConfig.maxMicroBatchPayloadSizeInBytes should not be null - writeConfig.maxMicroBatchPayloadSizeInBytes.isDefined shouldEqual true - writeConfig.maxMicroBatchPayloadSizeInBytes.get shouldEqual BatchRequestResponseConstants.DEFAULT_MAX_DIRECT_MODE_BATCH_REQUEST_BODY_SIZE_IN_BYTES + writeConfig.bulkExecutionConfigs should not be null + var bulkExecutorConfigs = writeConfig.bulkExecutionConfigs.get.asInstanceOf[CosmosWriteBulkExecutionConfigs] + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes should not be null + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes.isDefined shouldEqual true + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes.get shouldEqual BatchRequestResponseConstants.DEFAULT_MAX_DIRECT_MODE_BATCH_REQUEST_BODY_SIZE_IN_BYTES userConfig = Map( "spark.cosmos.write.strategy" -> "ItemOverwrite", @@ -988,9 +1010,11 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait { writeConfig = CosmosWriteConfig.parseWriteConfig(userConfig, schema) writeConfig should not be null - writeConfig.maxMicroBatchPayloadSizeInBytes should not be null - writeConfig.maxMicroBatchPayloadSizeInBytes.isDefined shouldEqual true - writeConfig.maxMicroBatchPayloadSizeInBytes.get shouldEqual 1000000 + writeConfig.bulkExecutionConfigs should not be null + bulkExecutorConfigs = writeConfig.bulkExecutionConfigs.get.asInstanceOf[CosmosWriteBulkExecutionConfigs] + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes should not be null + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes.isDefined shouldEqual true + bulkExecutorConfigs.maxMicroBatchPayloadSizeInBytes.get shouldEqual 1000000 } "Config Parser" should "validate default operation types for patch configs" in { diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala index ba92f33fc307..2fc74fbfc4d5 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala @@ -31,7 +31,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val metricsPublisher = new TestOutputMetricsPublisher val pointWriter = new PointWriter( @@ -73,7 +73,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val metricsPublisher = new TestOutputMetricsPublisher val pointWriter = new PointWriter( @@ -107,7 +107,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana secondObjectNodeHasAllFieldsOfFirstObjectNode(expectedItem, itemFromDB) shouldEqual true } - val deleteConfig = CosmosWriteConfig(ItemWriteStrategy.ItemDelete, maxRetryCount = 3, bulkEnabled = false) + val deleteConfig = CosmosWriteConfig(ItemWriteStrategy.ItemDelete, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointDeleter = new PointWriter( container, @@ -133,7 +133,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -192,7 +192,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val deleteConfig = CosmosWriteConfig( ItemWriteStrategy.ItemDeleteIfNotModified, maxRetryCount = 3, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointDeleter = new PointWriter( container, @@ -217,7 +218,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val container = getContainer val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, @@ -253,7 +254,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val writeConfig = CosmosWriteConfig( - ItemWriteStrategy.ItemOverwriteIfNotModified, maxRetryCount = 3, bulkEnabled = false) + ItemWriteStrategy.ItemOverwriteIfNotModified, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val metricsPublisher = new TestOutputMetricsPublisher var pointWriter = new PointWriter( @@ -372,7 +373,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val metricsPublisher = new TestOutputMetricsPublisher val pointWriter = new PointWriter( @@ -501,7 +503,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -578,7 +581,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -637,7 +641,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -718,7 +723,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -794,6 +800,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -860,7 +868,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -964,6 +973,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = false, + bulkTransactional = false, bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig]()))) @@ -1011,6 +1021,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = false, + bulkTransactional = false, bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig])) ) @@ -1059,7 +1070,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -1132,7 +1144,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -1179,7 +1192,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -1238,7 +1252,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, @@ -1314,6 +1329,8 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala index a1d8073f296d..5ada2ac957c3 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala @@ -32,7 +32,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -67,7 +67,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -96,7 +96,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit secondObjectNodeHasAllFieldsOfFirstObjectNode(expectedItem, itemFromDB) shouldEqual true } - val deleteConfig = CosmosWriteConfig(ItemWriteStrategy.ItemDelete, maxRetryCount = 3, bulkEnabled = false) + val deleteConfig = CosmosWriteConfig(ItemWriteStrategy.ItemDelete, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointDeleter = new PointWriter( container, partitionKeyDefinition, deleteConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -122,7 +122,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemOverwrite, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -181,7 +181,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val deleteConfig = CosmosWriteConfig( ItemWriteStrategy.ItemDeleteIfNotModified, maxRetryCount = 3, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointDeleter = new PointWriter( container, partitionKeyDefinition, deleteConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -206,7 +207,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val container = getContainer val containerProperties = container.read().block().getProperties val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition - val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false) + val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false, bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) val items = new mutable.HashMap[String, mutable.Set[ObjectNode]] with mutable.MultiMap[String, ObjectNode] @@ -242,7 +243,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition val writeConfig = CosmosWriteConfig( - ItemWriteStrategy.ItemOverwriteIfNotModified, maxRetryCount = 3, bulkEnabled = false) + ItemWriteStrategy.ItemOverwriteIfNotModified, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false) var pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -346,7 +347,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -446,7 +448,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -525,7 +528,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -596,7 +600,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -675,7 +680,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -759,6 +765,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) @@ -821,7 +829,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -930,6 +939,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = false, + bulkTransactional = false, bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig]()))) @@ -975,6 +985,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = false, + bulkTransactional = false, bulkMaxPendingOperations = Some(900), patchConfigs = Some(CosmosPatchConfigs(new TrieMap[String, CosmosPatchColumnConfig])) ) @@ -1034,7 +1045,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -1105,7 +1117,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -1151,7 +1164,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -1211,7 +1225,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit val writeConfig = CosmosWriteConfig( ItemWriteStrategy.ItemOverwrite, 5, - bulkEnabled = false) + bulkEnabled = false, + bulkTransactional = false) val pointWriter = new PointWriter( container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher) @@ -1292,6 +1307,8 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit ItemWriteStrategy.ItemOverwrite, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), bulkMaxPendingOperations = Some(900) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala index 5b63b6a236a8..778136d375cf 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala @@ -31,157 +31,161 @@ class SparkE2EBulkWriteITest //scalastyle:off magic.number //scalastyle:off null - it should s"support bulk ingestion when BulkWriter needs to get restarted" in { - val cosmosEndpoint = TestConfigurations.HOST - val cosmosMasterKey = TestConfigurations.MASTER_KEY - - val configMapBuilder = scala.collection.mutable.Map( - "spark.cosmos.accountEndpoint" -> cosmosEndpoint, - "spark.cosmos.accountKey" -> cosmosMasterKey, - "spark.cosmos.database" -> cosmosDatabase, - "spark.cosmos.container" -> cosmosContainer, - "spark.cosmos.serialization.inclusionMode" -> "NonDefault" - ) - - var faultInjectionRuleOption : Option[FaultInjectionRule] = None - - try { - // set-up logging - val logs = scala.collection.mutable.ListBuffer[CosmosDiagnosticsContext]() - - configMapBuilder += "spark.cosmos.account.clientBuilderInterceptors" -> "com.azure.cosmos.spark.TestCosmosClientBuilderInterceptor" - TestCosmosClientBuilderInterceptor.setCallback(builder => { - val thresholds = new CosmosDiagnosticsThresholds() - .setPointOperationLatencyThreshold(Duration.ZERO) - .setNonPointOperationLatencyThreshold(Duration.ZERO) - val telemetryCfg = new CosmosClientTelemetryConfig() - .showQueryMode(ShowQueryMode.ALL) - .diagnosticsHandler(new CompositeLoggingHandler(logs)) - .diagnosticsThresholds(thresholds) - builder.clientTelemetryConfig(telemetryCfg) - }) - - // set-up fault injection - configMapBuilder += "spark.cosmos.account.clientInterceptors" -> "com.azure.cosmos.spark.TestFaultInjectionClientInterceptor" - configMapBuilder += "spark.cosmos.write.flush.intervalInSeconds" -> "10" - configMapBuilder += "spark.cosmos.write.flush.noProgress.maxIntervalInSeconds" -> "30" - configMapBuilder += "spark.cosmos.write.flush.noProgress.maxRetryIntervalInSeconds" -> "300" - configMapBuilder += "spark.cosmos.write.onRetryCommitInterceptor" -> "com.azure.cosmos.spark.TestWriteOnRetryCommitInterceptor" - TestFaultInjectionClientInterceptor.setCallback(client => { - val faultInjectionResultBuilder = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) - .delay(Duration.ofHours(10000)) - .times(1) - - val endpoints = new FaultInjectionEndpointBuilder( - FeedRange.forLogicalPartition(new PartitionKey("range_1"))) - .build() - - val result = faultInjectionResultBuilder.build - val condition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.BATCH_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(endpoints) - .build - - faultInjectionRuleOption = Some(new FaultInjectionRuleBuilder("InjectedEndlessResponseDelay") - .condition(condition) - .result(result) - .build) - - TestWriteOnRetryCommitInterceptor.setCallback(() => faultInjectionRuleOption.get.disable()) - - CosmosFaultInjectionHelper.configureFaultInjectionRules( - client.getDatabase(cosmosDatabase).getContainer(cosmosContainer), - List(faultInjectionRuleOption.get).asJava).block - - client - }) - - val cfg = configMapBuilder.toMap - - val newSpark = getSpark - - // scalastyle:off underscore.import - // scalastyle:off import.grouping - import spark.implicits._ - val spark = newSpark - // scalastyle:on underscore.import - // scalastyle:on import.grouping - - val toBeIngested = scala.collection.mutable.ListBuffer[String]() - for (i <- 1 to 100) { - toBeIngested += s"record_$i" - } - - val df = toBeIngested.toSeq.toDF("id") - - var bytesWrittenSnapshot = 0L - var recordsWrittenSnapshot = 0L - var totalRequestChargeSnapshot: Option[AccumulableInfo] = None - - val statusStore = spark.sharedState.statusStore - val oldCount = statusStore.executionsCount() - - spark.sparkContext - .addSparkListener( - new SparkListener { - override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { - val outputMetrics = taskEnd.taskMetrics.outputMetrics - logInfo(s"ON_TASK_END - Records written: ${outputMetrics.recordsWritten}, " + - s"Bytes written: ${outputMetrics.bytesWritten}, " + - s"${taskEnd.taskInfo.accumulables.mkString(", ")}") - bytesWrittenSnapshot = outputMetrics.bytesWritten - - recordsWrittenSnapshot = outputMetrics.recordsWritten - - taskEnd - .taskInfo - .accumulables - .filter(accumulableInfo => accumulableInfo.name.isDefined && - accumulableInfo.name.get.equals(CosmosConstants.MetricNames.TotalRequestCharge)) - .foreach( - accumulableInfo => { - totalRequestChargeSnapshot = Some(accumulableInfo) - } - ) - } - }) - - df.write.format("cosmos.oltp").mode("Append").options(cfg).save() - - // Wait until the new execution is started and being tracked. - eventually(timeout(10.seconds), interval(10.milliseconds)) { - assert(statusStore.executionsCount() > oldCount) - } - - // Wait for listener to finish computing the metrics for the execution. - eventually(timeout(10.seconds), interval(10.milliseconds)) { - assert(statusStore.executionsList().nonEmpty && - statusStore.executionsList().last.metricValues != null) - } - - recordsWrittenSnapshot shouldEqual 100 - bytesWrittenSnapshot > 0 shouldEqual true - - // that the write by spark is visible by the client query - // wait for a second to allow replication is completed. - Thread.sleep(1000) - - // the new item will be always persisted - val ids = queryItems("SELECT c.id FROM c ORDER by c.id").toArray - ids should have size 100 - val firstDoc = ids(0) - firstDoc.get("id").asText() shouldEqual "record_1" - - // validate logs - logs.nonEmpty shouldEqual true - } finally { - TestCosmosClientBuilderInterceptor.resetCallback() - TestFaultInjectionClientInterceptor.resetCallback() - faultInjectionRuleOption match { - case Some(rule) => rule.disable() - case None => + for (enableBulkTransactional <- Seq(true, false)) { + it should s"support bulk ingestion when BulkWriter needs to get restarted with transactional bulk enabled $enableBulkTransactional" in { + + val cosmosEndpoint = TestConfigurations.HOST + val cosmosMasterKey = TestConfigurations.MASTER_KEY + + val configMapBuilder = scala.collection.mutable.Map( + "spark.cosmos.accountEndpoint" -> cosmosEndpoint, + "spark.cosmos.accountKey" -> cosmosMasterKey, + "spark.cosmos.database" -> cosmosDatabase, + "spark.cosmos.container" -> cosmosContainer, + "spark.cosmos.serialization.inclusionMode" -> "NonDefault", + "spark.cosmos.write.bulk.transactional" -> enableBulkTransactional.toString + ) + + var faultInjectionRuleOption : Option[FaultInjectionRule] = None + + try { + // set-up logging + val logs = scala.collection.mutable.ListBuffer[CosmosDiagnosticsContext]() + + configMapBuilder += "spark.cosmos.account.clientBuilderInterceptors" -> "com.azure.cosmos.spark.TestCosmosClientBuilderInterceptor" + TestCosmosClientBuilderInterceptor.setCallback(builder => { + val thresholds = new CosmosDiagnosticsThresholds() + .setPointOperationLatencyThreshold(Duration.ZERO) + .setNonPointOperationLatencyThreshold(Duration.ZERO) + val telemetryCfg = new CosmosClientTelemetryConfig() + .showQueryMode(ShowQueryMode.ALL) + .diagnosticsHandler(new CompositeLoggingHandler(logs)) + .diagnosticsThresholds(thresholds) + builder.clientTelemetryConfig(telemetryCfg) + }) + + // set-up fault injection + configMapBuilder += "spark.cosmos.account.clientInterceptors" -> "com.azure.cosmos.spark.TestFaultInjectionClientInterceptor" + configMapBuilder += "spark.cosmos.write.flush.intervalInSeconds" -> "10" + configMapBuilder += "spark.cosmos.write.flush.noProgress.maxIntervalInSeconds" -> "30" + configMapBuilder += "spark.cosmos.write.flush.noProgress.maxRetryIntervalInSeconds" -> "300" + configMapBuilder += "spark.cosmos.write.onRetryCommitInterceptor" -> "com.azure.cosmos.spark.TestWriteOnRetryCommitInterceptor" + TestFaultInjectionClientInterceptor.setCallback(client => { + val faultInjectionResultBuilder = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(Duration.ofHours(10000)) + .times(1) + + val endpoints = new FaultInjectionEndpointBuilder( + FeedRange.forLogicalPartition(new PartitionKey("range_1"))) + .build() + + val result = faultInjectionResultBuilder.build + val condition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.BATCH_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(endpoints) + .build + + faultInjectionRuleOption = Some(new FaultInjectionRuleBuilder("InjectedEndlessResponseDelay") + .condition(condition) + .result(result) + .build) + + TestWriteOnRetryCommitInterceptor.setCallback(() => faultInjectionRuleOption.get.disable()) + + CosmosFaultInjectionHelper.configureFaultInjectionRules( + client.getDatabase(cosmosDatabase).getContainer(cosmosContainer), + List(faultInjectionRuleOption.get).asJava).block + + client + }) + + val cfg = configMapBuilder.toMap + + val newSpark = getSpark + + // scalastyle:off underscore.import + // scalastyle:off import.grouping + import spark.implicits._ + val spark = newSpark + // scalastyle:on underscore.import + // scalastyle:on import.grouping + + val toBeIngested = scala.collection.mutable.ListBuffer[String]() + for (i <- 1 to 100) { + toBeIngested += s"record_$i" + } + + val df = toBeIngested.toSeq.toDF("id") + + var bytesWrittenSnapshot = 0L + var recordsWrittenSnapshot = 0L + var totalRequestChargeSnapshot: Option[AccumulableInfo] = None + + val statusStore = spark.sharedState.statusStore + val oldCount = statusStore.executionsCount() + + spark.sparkContext + .addSparkListener( + new SparkListener { + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + val outputMetrics = taskEnd.taskMetrics.outputMetrics + logInfo(s"ON_TASK_END - Records written: ${outputMetrics.recordsWritten}, " + + s"Bytes written: ${outputMetrics.bytesWritten}, " + + s"${taskEnd.taskInfo.accumulables.mkString(", ")}") + bytesWrittenSnapshot = outputMetrics.bytesWritten + + recordsWrittenSnapshot = outputMetrics.recordsWritten + + taskEnd + .taskInfo + .accumulables + .filter(accumulableInfo => accumulableInfo.name.isDefined && + accumulableInfo.name.get.equals(CosmosConstants.MetricNames.TotalRequestCharge)) + .foreach( + accumulableInfo => { + totalRequestChargeSnapshot = Some(accumulableInfo) + } + ) + } + }) + + df.write.format("cosmos.oltp").mode("Append").options(cfg).save() + + // Wait until the new execution is started and being tracked. + eventually(timeout(10.seconds), interval(10.milliseconds)) { + assert(statusStore.executionsCount() > oldCount) + } + + // Wait for listener to finish computing the metrics for the execution. + eventually(timeout(10.seconds), interval(10.milliseconds)) { + assert(statusStore.executionsList().nonEmpty && + statusStore.executionsList().last.metricValues != null) + } + + recordsWrittenSnapshot shouldEqual 100 + bytesWrittenSnapshot > 0 shouldEqual true + + // that the write by spark is visible by the client query + // wait for a second to allow replication is completed. + Thread.sleep(1000) + + // the new item will be always persisted + val ids = queryItems("SELECT c.id FROM c ORDER by c.id").toArray + ids should have size 100 + val firstDoc = ids(0) + firstDoc.get("id").asText() shouldEqual "record_1" + + // validate logs + logs.nonEmpty shouldEqual true + } finally { + TestCosmosClientBuilderInterceptor.resetCallback() + TestFaultInjectionClientInterceptor.resetCallback() + faultInjectionRuleOption match { + case Some(rule) => rule.disable() + case None => + } } } } diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TransactionalBatchITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TransactionalBatchITest.scala index 3b9feae2fdbd..0905934bebba 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TransactionalBatchITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TransactionalBatchITest.scala @@ -2,9 +2,9 @@ // Licensed under the MIT License. package com.azure.cosmos.spark +import com.azure.cosmos.CosmosAsyncClient import com.azure.cosmos.implementation.{TestConfigurations, Utils} import com.azure.cosmos.models.{PartitionKey, PartitionKeyBuilder} -import com.azure.cosmos.{CosmosAsyncClient, CosmosException} import com.azure.cosmos.test.faultinjection._ import com.fasterxml.jackson.databind.node.ObjectNode import org.apache.spark.sql.types._ @@ -50,7 +50,7 @@ class TransactionalBatchITest extends IntegrationSpec ) val operationsDf = spark.createDataFrame(batchOperations.asJava, schema) - + // Execute transactional batch using bulk transactional mode operationsDf.write .format("cosmos.oltp") diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/utils/CosmosPatchTestHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/utils/CosmosPatchTestHelper.scala index 982791600b58..c6876bef3b1c 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/utils/CosmosPatchTestHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/utils/CosmosPatchTestHelper.scala @@ -6,7 +6,7 @@ package com.azure.cosmos.spark.utils import com.azure.cosmos.CosmosAsyncContainer import com.azure.cosmos.implementation.apachecommons.lang.StringUtils import com.azure.cosmos.models.PartitionKeyDefinition -import com.azure.cosmos.spark.{BulkWriter, CosmosContainerConfig, CosmosPatchColumnConfig, CosmosPatchConfigs, CosmosWriteConfig, DiagnosticsConfig, ItemWriteStrategy, OutputMetricsPublisherTrait, PointWriter} +import com.azure.cosmos.spark.{BulkWriter, CosmosContainerConfig, CosmosPatchColumnConfig, CosmosPatchConfigs, CosmosWriteBulkExecutionConfigs, CosmosWriteConfig, DiagnosticsConfig, ItemWriteStrategy, OutputMetricsPublisherTrait, PointWriter} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.node.ObjectNode import org.apache.commons.lang3.RandomUtils @@ -160,6 +160,7 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { )) } + // TODO: wire up with transactional batch when patch is supported in transactional bulk writer def getBulkWriterForPatch(columnConfigsMap: TrieMap[String, CosmosPatchColumnConfig], container: CosmosAsyncContainer, containerConfig: CosmosContainerConfig, @@ -171,6 +172,8 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { ItemWriteStrategy.ItemPatch, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), patchConfigs = Some(patchConfigs)) new BulkWriter( @@ -183,6 +186,7 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { 1) } + // TODO: wire up with transactional bulk writer when patchBulkUpdate is supported in transactional bulk writer def getBulkWriterForPatchBulkUpdate(columnConfigsMap: TrieMap[String, CosmosPatchColumnConfig], container: CosmosAsyncContainer, containerConfig: CosmosContainerConfig, @@ -193,6 +197,8 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = true, + bulkTransactional = false, + bulkExecutionConfigs = Some(CosmosWriteBulkExecutionConfigs()), patchConfigs = Some(patchConfigs)) new BulkWriter( @@ -216,6 +222,7 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { ItemWriteStrategy.ItemPatch, 5, bulkEnabled = false, + bulkTransactional = false, patchConfigs = Some(patchConfigs)) new PointWriter( @@ -237,6 +244,7 @@ def getPatchFullTestSchemaWithSubpartitions(): StructType = { ItemWriteStrategy.ItemBulkUpdate, 5, bulkEnabled = false, + bulkTransactional = false, patchConfigs = Some(patchConfigs)) new PointWriter( diff --git a/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md index e3cca105c9e2..4c3391dcb35d 100644 --- a/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md @@ -4,6 +4,7 @@ #### Features Added * Initial release of Spark 4.0 connector with Scala 2.13 support +* Added transactional batch support. See [PR 47478](https://github.com/Azure/azure-sdk-for-java/pull/47478) and [PR 47697](https://github.com/Azure/azure-sdk-for-java/pull/47697) and [47803](https://github.com/Azure/azure-sdk-for-java/pull/47803) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index 96d2969a8cb3..657cc22f6366 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -70,6 +70,17 @@ public static Object[][] exceptionArgsProvider() { }; } + @DataProvider(name = "requestRateTooLarge_batch_ArgProvider") + public static Object[][] requestRateTooLarge_batch_ArgProvider() { + return new Object[][]{ + // OperationType, ResourceType, disableRetryForThrottledBatchRequest + { OperationType.Batch, ResourceType.Document, true }, + { OperationType.Batch, ResourceType.Document, false }, + { OperationType.Batch, ResourceType.DocumentCollection, true }, + { OperationType.Read, ResourceType.Document, true } + }; + } + @Test(groups = "unit", dataProvider = "requestRateTooLargeArgProvider") public void requestRateTooLarge( OperationType operationType, @@ -94,7 +105,8 @@ public void requestRateTooLarge( throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); // Create throttling exception with retry delay Map headers = new HashMap<>(); @@ -170,7 +182,8 @@ public void networkFailureOnRead() throws Exception { throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -222,7 +235,8 @@ public void shouldRetryOnGatewayTimeout( throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.REQUEST_TIMEOUT, exception); @@ -269,7 +283,8 @@ public void tcpNetworkFailureOnRead() throws Exception { retryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -326,7 +341,8 @@ public void networkFailureOnWrite() throws Exception { throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = new SocketException("Dummy SocketException");; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -372,7 +388,8 @@ public void tcpNetworkFailureOnWrite( retryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); //Non retribale exception for write GoneException goneException = new GoneException(exception); @@ -441,7 +458,8 @@ public void networkFailureOnUpsert() throws Exception { throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -485,7 +503,8 @@ public void tcpNetworkFailureOnUpsert() throws Exception { retryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = new SocketException("Dummy SocketException"); GoneException goneException = new GoneException(exception); @@ -530,7 +549,8 @@ public void networkFailureOnDelete() throws Exception { throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException( @@ -575,7 +595,8 @@ public void tcpNetworkFailureOnDelete() throws Exception { retryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -619,7 +640,8 @@ public void onBeforeSendRequestNotInvoked() { throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); Exception exception = ReadTimeoutException.INSTANCE; @@ -660,7 +682,8 @@ public void returnWithInternalServerErrorOnPpcbFailure(CosmosException cosmosExc throttlingRetryOptions, null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); RxDocumentServiceRequest dsr; Mono shouldRetry; @@ -677,6 +700,72 @@ public void returnWithInternalServerErrorOnPpcbFailure(CosmosException cosmosExc .build()); } + @Test(groups = "unit", dataProvider = "requestRateTooLarge_batch_ArgProvider") + public void requestRateTooLarge_batch( + OperationType operationType, + ResourceType resourceType, + boolean disableRetryForThrottledBatchRequest) throws Exception { + + ThrottlingRetryOptions throttlingRetryOptions = + new ThrottlingRetryOptions().setMaxRetryAttemptsOnThrottledRequests(1); + + GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker globalPartitionEndpointManagerForPerPartitionCircuitBreaker + = Mockito.mock(GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker.class); + + GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover globalPartitionEndpointManagerForPerPartitionAutomaticFailover + = Mockito.mock(GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover.class); + + Mockito + .doReturn(new RegionalRoutingContext(new URI("http://localhost"))) + .when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); + + Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy( + mockDiagnosticsClientContext(), + endpointManager, + true, + throttlingRetryOptions, + null, + globalPartitionEndpointManagerForPerPartitionCircuitBreaker, + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + disableRetryForThrottledBatchRequest); + + // Create throttling exception with retry delay + Map headers = new HashMap<>(); + headers.put( + HttpConstants.HttpHeaders.RETRY_AFTER_IN_MILLISECONDS, + "1000"); + headers.put(WFConstants.BackendHeaders.SUB_STATUS, + Integer.toString(HttpConstants.SubStatusCodes.USER_REQUEST_RATE_TOO_LARGE)); + RequestRateTooLargeException throttlingException = new RequestRateTooLargeException(null, 1, "1", headers); + + RxDocumentServiceRequest request = RxDocumentServiceRequest.createFromName(mockDiagnosticsClientContext(), + operationType, + "/dbs/db/colls/col", + resourceType); + request.requestContext = new DocumentServiceRequestContext(); + request.requestContext.routeToLocation(0, true); + + clientRetryPolicy.onBeforeSendRequest(request); + + Mono shouldRetry = clientRetryPolicy.shouldRetry(throttlingException); + if (operationType != OperationType.Batch || resourceType != ResourceType.Document) { + validateSuccess(shouldRetry, ShouldRetryValidator.builder() + .nullException() + .shouldRetry(true) + .build()); + } else if (disableRetryForThrottledBatchRequest) { + validateSuccess(shouldRetry, ShouldRetryValidator.builder() + .shouldRetry(false) + .build()); + } else { + validateSuccess(shouldRetry, ShouldRetryValidator.builder() + .shouldRetry(true) + .build()); + } + } + public static void validateSuccess(Mono single, ShouldRetryValidator validator) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java index e672381f247a..592b49dbe79b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java @@ -323,7 +323,8 @@ public void partitionedSessionToken(boolean isNameBased) throws NoSuchMethodExce serverBatchRequest, new RequestOptions(), false, - true) + true, + false) .block(); assertThat(getSessionTokensInRequests().size()).isEqualTo(1); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/BulkExecutorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/BulkExecutorTest.java index 0b0346e9a2b7..c7ccbbcd3678 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/BulkExecutorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/BulkExecutorTest.java @@ -38,10 +38,12 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import java.lang.reflect.Field; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.UUID; import java.util.stream.Collectors; @@ -292,4 +294,56 @@ public void executeBulk_complete() throws InterruptedException { iterations++; } } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeBulk_tooManyRequest_recordInThresholds() throws Exception { + this.container = createContainer(database); + + String pkValue = UUID.randomUUID().toString(); + TestDoc testDoc = this.populateTestDoc(pkValue); + List cosmosItemOperations = new ArrayList<>(); + cosmosItemOperations.add(CosmosBulkOperations.getCreateItemOperation(testDoc, new PartitionKey(pkValue))); + + FaultInjectionRule tooManyRequestRule = + new FaultInjectionRuleBuilder("ttrs-" + UUID.randomUUID()) + .condition(new FaultInjectionConditionBuilder().operationType(FaultInjectionOperationType.BATCH_ITEM).build()) + .result(FaultInjectionResultBuilders.getResultBuilder(FaultInjectionServerErrorType.TOO_MANY_REQUEST).times(1).build()) + .duration(Duration.ofSeconds(30)) + .hitLimit(1) + .build(); + + CosmosBulkExecutionOptionsImpl cosmosBulkExecutionOptions = new CosmosBulkExecutionOptionsImpl(); + final BulkExecutor executor = new BulkExecutor<>( + container, + Flux.fromArray(cosmosItemOperations.toArray(new CosmosItemOperation[0])), + cosmosBulkExecutionOptions); + + try { + CosmosFaultInjectionHelper.configureFaultInjectionRules(container, Arrays.asList(tooManyRequestRule)).block(); + + List> responses = executor.execute().collectList().block(); + + assertThat(responses.size()).isEqualTo(1); + + // inspect partitionScopeThresholds via reflection and verify a retry was recorded + Field mapField = BulkExecutor.class.getDeclaredField("partitionScopeThresholds"); + mapField.setAccessible(true); + Map thresholdsMap = (Map) mapField.get(executor); + + assertThat(thresholdsMap).isNotEmpty(); + Object thresholdsObj = thresholdsMap.values().iterator().next(); + PartitionScopeThresholds thresholds = (PartitionScopeThresholds) thresholdsObj; + + PartitionScopeThresholds.CurrentIntervalThresholds current = thresholds.getCurrentThresholds(); + long retried = current.currentRetriedOperationCount.get(); + + assertThat(retried).isEqualTo(1); + + } finally { + tooManyRequestRule.disable(); + if (executor != null && !executor.isDisposed()) { + executor.dispose(); + } + } + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/CosmosTransactionalBulkExecutionOptionsImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/CosmosTransactionalBulkExecutionOptionsImplTest.java new file mode 100644 index 000000000000..ed8a44756c9d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/CosmosTransactionalBulkExecutionOptionsImplTest.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.batch; + +import com.azure.cosmos.implementation.CosmosTransactionalBulkExecutionOptionsImpl; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.testng.AssertJUnit.fail; + +public class CosmosTransactionalBulkExecutionOptionsImplTest { + + @DataProvider(name = "maxBatchesConcurrencyArgProvider") + public static Object[][] maxBatchesConcurrencyArgProvider() { + return new Object[][]{ + // value, is valid + { 1, true }, + { 10, false }, + { 5, true }, + { 0, false }, + { -1, false } + }; + } + + @Test(groups = "unit") + public void default_options() { + CosmosTransactionalBulkExecutionOptionsImpl options = new CosmosTransactionalBulkExecutionOptionsImpl(); + assertThat(options.getMaxBatchesConcurrency()).isEqualTo(5); + assertThat(options.getMaxOperationsConcurrency()).isEqualTo(100); + } + + @Test(groups = "unit", dataProvider = "maxBatchesConcurrencyArgProvider") + public void setMaxOperationsConcurrency(int value, boolean isValid) { + CosmosTransactionalBulkExecutionOptionsImpl options = new CosmosTransactionalBulkExecutionOptionsImpl(); + try { + options.setMaxBatchesConcurrency(value); + if (isValid) { + assertThat(options.getMaxBatchesConcurrency()).isEqualTo(value); + } else { + fail("Should have failed for maxBatchesConcurrency " + value); + } + } catch (IllegalArgumentException e) { + if (isValid) { + fail("should be valid value for maxBatchesConcurrency"); + } + } + } + +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutorTest.java new file mode 100644 index 000000000000..d9b8f43776fb --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutorTest.java @@ -0,0 +1,426 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.cosmos.implementation.batch; + +import com.azure.cosmos.BatchTestBase; +import com.azure.cosmos.ConnectionMode; +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosAsyncDatabase; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDatabaseForTest; +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.CosmosTransactionalBulkExecutionOptionsImpl; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionResultBuilders; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import com.azure.cosmos.test.faultinjection.FaultInjectionConditionBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionRuleBuilder; +import io.netty.handler.codec.http.HttpResponseStatus; +import org.testng.SkipException; +import org.testng.annotations.AfterClass; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; +import reactor.core.Disposable; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.lang.reflect.Field; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TransactionalBulkExecutorTest extends BatchTestBase { + + private CosmosAsyncClient client; + private CosmosAsyncContainer container; + private CosmosAsyncDatabase database; + private String preExistingDatabaseId = CosmosDatabaseForTest.generateId(); + + @Factory(dataProvider = "simpleClientBuildersWithJustDirectTcp") + public TransactionalBulkExecutorTest(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @AfterClass(groups = { "emulator" }, timeOut = 3 * SHUTDOWN_TIMEOUT, alwaysRun = true) + public void afterClass() { + logger.info("starting ...."); + safeDeleteDatabase(database); + safeClose(client); + } + + @AfterMethod(groups = { "emulator" }) + public void afterTest() { + if (this.container != null) { + try { + this.container.delete().block(); + } catch (CosmosException error) { + if (error.getStatusCode() != 404) { + throw error; + } + } + } + } + + @BeforeMethod(groups = { "emulator" }) + public void beforeTest() throws Exception { + this.container = null; + } + + @BeforeClass(groups = { "emulator" }, timeOut = SETUP_TIMEOUT) + public void before_CosmosContainerTest() { + client = getClientBuilder().buildAsyncClient(); + database = createDatabase(client, preExistingDatabaseId); + } + + static protected CosmosAsyncContainer createContainer(CosmosAsyncDatabase database) { + String collectionName = UUID.randomUUID().toString(); + CosmosContainerProperties containerProperties = getCollectionDefinition(collectionName); + + database.createContainer(containerProperties).block(); + return database.getContainer(collectionName); + } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_cancel() throws InterruptedException { + int totalRequest = 100; + this.container = createContainer(database); + + List cosmosBatches = new ArrayList<>(); + for (int i = 0; i < totalRequest; i++) { + String partitionKey = UUID.randomUUID().toString(); + TestDoc testDoc = this.populateTestDoc(partitionKey); + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(partitionKey)); + batch.createItemOperation(testDoc); + cosmosBatches.add(batch); + } + + CosmosTransactionalBulkExecutionOptionsImpl cosmosBulkExecutionOptions = new CosmosTransactionalBulkExecutionOptionsImpl(); + Flux inputFlux = Flux + .fromIterable(cosmosBatches) + .delayElements(Duration.ofMillis(100)); + final TransactionalBulkExecutor executor = new TransactionalBulkExecutor( + container, + inputFlux, + cosmosBulkExecutionOptions); + Flux bulkResponseFlux = Flux.deferContextual(context -> executor.execute()); + + Disposable disposable = bulkResponseFlux.subscribe(); + disposable.dispose(); + + int iterations = 0; + while (true) { + assertThat(iterations < 100); + if (executor.isDisposed()) { + break; + } + + Thread.sleep(10); + iterations++; + } + } + + // Write operations should not be retried on a gone exception because the operation might have succeeded. + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_OnGoneFailure() { + this.container = createContainer(database); + if (!ImplementationBridgeHelpers + .CosmosAsyncClientHelper + .getCosmosAsyncClientAccessor() + .getConnectionMode(this.client) + .equals(ConnectionMode.DIRECT.toString())) { + throw new SkipException("Failure injection for gone exception only supported for DIRECT mode"); + } + + List cosmosBatches = new ArrayList<>(); + String duplicatePK = UUID.randomUUID().toString(); + String id = UUID.randomUUID().toString(); + + EventDoc eventDoc = new EventDoc(id, 2, 4, "type1", + duplicatePK); + CosmosBatch createBatch = CosmosBatch.createCosmosBatch(new PartitionKey(duplicatePK)); + createBatch.createItemOperation(eventDoc); + cosmosBatches.add(createBatch); + + // configure fault injection rules + // using response delay to simulate a client generated gone for write operations + FaultInjectionRule serverResponseDelayRule = + new FaultInjectionRuleBuilder("serverResponseDelay-" + UUID.randomUUID()) + .condition( + new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.BATCH_ITEM) + .build() + ) + .result( + FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(Duration.ofSeconds(6)) + .build() + ) + .duration(Duration.ofSeconds(10)) + .build(); + + final TransactionalBulkExecutor executor = new TransactionalBulkExecutor( + this.container, + Flux.fromIterable(cosmosBatches), + new CosmosTransactionalBulkExecutionOptionsImpl()); + + try { + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serverResponseDelayRule)) + .block(); + + List bulkResponse = + Flux + .deferContextual(context -> executor.execute()) + .collectList() + .block(); + + assertThat(bulkResponse.size()).isEqualTo(1); + + CosmosBulkTransactionalBatchResponse operationResponse = bulkResponse.get(0); + CosmosBatchResponse batchResponse = operationResponse.getResponse(); + assertThat(batchResponse).isNull(); + assertThat(operationResponse.getException()).isNotNull(); + + } finally { + if (executor != null && !executor.isDisposed()) { + executor.dispose(); + } + serverResponseDelayRule.disable(); + } + } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_complete() throws InterruptedException { + int totalRequest = 10; + this.container = createContainer(database); + + List cosmosBatches = new ArrayList<>(); + for (int i = 0; i < totalRequest; i++) { + String partitionKey = UUID.randomUUID().toString(); + TestDoc testDoc = this.populateTestDoc(partitionKey); + CosmosBatch batch1 = CosmosBatch.createCosmosBatch(new PartitionKey(partitionKey)); + batch1.createItemOperation(testDoc); + cosmosBatches.add(batch1); + + partitionKey = UUID.randomUUID().toString(); + EventDoc eventDoc = new EventDoc(UUID.randomUUID().toString(), 2, 4, "type1", + partitionKey); + CosmosBatch batch2 = CosmosBatch.createCosmosBatch(new PartitionKey(partitionKey)); + batch2.createItemOperation(eventDoc); + cosmosBatches.add(batch2); + } + + CosmosTransactionalBulkExecutionOptionsImpl cosmosBulkExecutionOptions = new CosmosTransactionalBulkExecutionOptionsImpl(); + final TransactionalBulkExecutor executor = new TransactionalBulkExecutor( + container, + Flux.fromIterable(cosmosBatches), + cosmosBulkExecutionOptions); + Flux bulkResponseFlux = + Flux.deferContextual(context -> executor.execute()); + + Mono> convertToListMono = bulkResponseFlux + .collect(Collectors.toList()); + List bulkResponse = convertToListMono.block(); + + assertThat(bulkResponse.size()).isEqualTo(totalRequest * 2); + + for (CosmosBulkTransactionalBatchResponse response : bulkResponse) { + CosmosBatchResponse batchResponse = response.getResponse(); + + assertThat(batchResponse).isNotNull(); + assertThat(batchResponse.getResults().size()).isGreaterThan(0); + assertThat(batchResponse.getResults().get(0).getStatusCode()).isEqualTo(HttpResponseStatus.CREATED.code()); + assertThat(batchResponse.getRequestCharge()).isGreaterThan(0.0); + assertThat(batchResponse.getDiagnostics().toString()).isNotNull(); + } + + int iterations = 0; + while (true) { + assertThat(iterations < 100); + if (executor.isDisposed()) { + break; + } + + Thread.sleep(10); + iterations++; + } + } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_maxConcurrentOpsLessThanBatchOps_complete() { + // test to verify that even CosmosTransactionalBulkExecutionOptionsImpl.maxOperationsConcurrency < CosmosBatch.Operations.size + // the executor can still complete + + this.container = createContainer(database); + + List cosmosBatches = new ArrayList<>(); + String pkValue = UUID.randomUUID().toString(); + + CosmosBatch cosmosBatch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); + for (int i = 0; i < 10; i++) { + TestDoc testDoc = this.populateTestDoc(pkValue); + cosmosBatch.createItemOperation(testDoc); + } + cosmosBatches.add(cosmosBatch); + + CosmosTransactionalBulkExecutionOptionsImpl transactionalBulkExecutionOptions = new CosmosTransactionalBulkExecutionOptionsImpl(); + transactionalBulkExecutionOptions.setMaxOperationsConcurrency(1); + final TransactionalBulkExecutor executor = new TransactionalBulkExecutor( + container, + Flux.fromIterable(cosmosBatches), + transactionalBulkExecutionOptions); + + List responses = executor.execute().collectList().block(); + assertThat(responses.size()).isEqualTo(1); + assertThat(responses.get(0).getResponse().getStatusCode()).isEqualTo(HttpResponseStatus.OK.code()); + } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_concurrencyControl_e2e() { + this.container = createContainer(database); + + String pkValue = UUID.randomUUID().toString(); + int batchCount = 3; + int delayMillis = 1000; + + List cosmosBatches = new ArrayList<>(); + for (int i = 0; i < batchCount; i++) { + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); + TestDoc testDoc = this.populateTestDoc(pkValue); + batch.createItemOperation(testDoc); + cosmosBatches.add(batch); + } + + FaultInjectionRule serverResponseDelayRule = + new FaultInjectionRuleBuilder("serverResponseDelay-" + UUID.randomUUID()) + .condition( + new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.BATCH_ITEM) + .build() + ) + .result( + FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(Duration.ofMillis(delayMillis)) + .build() + ) + .duration(Duration.ofSeconds(60)) + .build(); + + try { + CosmosFaultInjectionHelper.configureFaultInjectionRules(container, Arrays.asList(serverResponseDelayRule)).block(); + + // run with concurrency = 1 -> batches for same partition should be serialized + CosmosTransactionalBulkExecutionOptionsImpl optsSerial = new CosmosTransactionalBulkExecutionOptionsImpl(); + optsSerial.setMaxOperationsConcurrency(1); + final TransactionalBulkExecutor serialExecutor = new TransactionalBulkExecutor( + container, + Flux.fromIterable(cosmosBatches), + optsSerial); + + long startSerial = System.currentTimeMillis(); + List serialResponses = serialExecutor.execute().collectList().block(); + long endSerial = System.currentTimeMillis(); + + long durationSerial = endSerial - startSerial; + + assertThat(serialResponses.size()).isEqualTo(batchCount); + + // run with higher concurrency + CosmosTransactionalBulkExecutionOptionsImpl optsParallel = new CosmosTransactionalBulkExecutionOptionsImpl(); + optsParallel.setMaxOperationsConcurrency(batchCount); + final TransactionalBulkExecutor parallelExecutor = new TransactionalBulkExecutor( + container, + Flux.fromIterable(cosmosBatches), + optsParallel); + + long startParallel = System.currentTimeMillis(); + List parallelResponses = parallelExecutor.execute().collectList().block(); + long endParallel = System.currentTimeMillis(); + + long durationParallel = endParallel - startParallel; + + assertThat(parallelResponses.size()).isEqualTo(batchCount); + + // With serialized execution duration should be approximately batchCount * delayMillis + assertThat(durationSerial).isGreaterThanOrEqualTo((long)batchCount * delayMillis * 9 / 10); + + // Parallel execution should be faster than serialized execution + assertThat(durationParallel).isLessThan(durationSerial); + + } finally { + serverResponseDelayRule.disable(); + } + } + + @Test(groups = { "emulator" }, timeOut = TIMEOUT) + public void executeTransactionalBulk_tooManyRequest_recordInThresholds() throws Exception { + this.container = createContainer(database); + + String pkValue = UUID.randomUUID().toString(); + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(pkValue)); + TestDoc testDoc = this.populateTestDoc(pkValue); + batch.createItemOperation(testDoc); + + FaultInjectionRule tooManyRequestRule = + new FaultInjectionRuleBuilder("ttrs-" + UUID.randomUUID()) + .condition(new FaultInjectionConditionBuilder().operationType(FaultInjectionOperationType.BATCH_ITEM).build()) + .result(FaultInjectionResultBuilders.getResultBuilder(FaultInjectionServerErrorType.TOO_MANY_REQUEST).times(1).build()) + .duration(Duration.ofSeconds(30)) + .hitLimit(1) + .build(); + + final TransactionalBulkExecutor executor = new TransactionalBulkExecutor( + container, + Flux.fromIterable(Arrays.asList(batch)), + new CosmosTransactionalBulkExecutionOptionsImpl()); + + try { + CosmosFaultInjectionHelper.configureFaultInjectionRules(container, Arrays.asList(tooManyRequestRule)).block(); + + List responses = executor.execute().collectList().block(); + + assertThat(responses.size()).isEqualTo(1); + CosmosBulkTransactionalBatchResponse resp = responses.get(0); + assertThat(resp.getResponse()).isNotNull(); + + // inspect partitionScopeThresholds via reflection and verify a retry was recorded + Field mapField = TransactionalBulkExecutor.class.getDeclaredField("partitionScopeThresholds"); + mapField.setAccessible(true); + Map thresholdsMap = (Map) mapField.get(executor); + + assertThat(thresholdsMap).isNotEmpty(); + Object thresholdsObj = thresholdsMap.values().iterator().next(); + PartitionScopeThresholds thresholds = (PartitionScopeThresholds) thresholdsObj; + + PartitionScopeThresholds.CurrentIntervalThresholds current = thresholds.getCurrentThresholds(); + long retried = current.currentRetriedOperationCount.get(); + + assertThat(retried).isEqualTo(1); + + } finally { + tooManyRequestRule.disable(); + if (executor != null && !executor.isDisposed()) { + executor.dispose(); + } + } + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/routing/ApplicableRegionEvaluatorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/routing/ApplicableRegionEvaluatorTest.java index 3312dcba3eda..3f3f46c3e665 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/routing/ApplicableRegionEvaluatorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/routing/ApplicableRegionEvaluatorTest.java @@ -209,7 +209,8 @@ public void validateApplicableRegions( new ThrottlingRetryOptions(), null, globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + false); for (int i = 0; i < expectedApplicableRegionalRoutingContexts.size(); i++) { Assertions.assertThat(actualApplicableRegionalRoutingContexts.get(i)).isEqualTo(expectedApplicableRegionalRoutingContexts.get(i)); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index 2fcf9bfe50cf..087a13241aa3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -119,6 +119,8 @@ public class CosmosAsyncContainer { ImplementationBridgeHelpers.CosmosBulkExecutionOptionsHelper.getCosmosBulkExecutionOptionsAccessor(); private static final ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.CosmosClientTelemetryConfigAccessor clientTelemetryConfigAccessor = ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.getCosmosClientTelemetryConfigAccessor(); + private static final ImplementationBridgeHelpers.CosmosBatchRequestOptionsHelper.CosmosBatchRequestOptionsAccessor batchRequestOptionsAccessor = + ImplementationBridgeHelpers.CosmosBatchRequestOptionsHelper.getCosmosBatchRequestOptionsAccessor(); private final CosmosAsyncDatabase database; private final String id; @@ -1266,8 +1268,14 @@ public Mono executeCosmosBatch( RequestOptions requestOptionsInternal = ModelBridgeInternal.toRequestOptions(requestOptions); applyPolicies(OperationType.Batch, ResourceType.Document, requestOptionsInternal, this.batchSpanName); + boolean disableRetryForThrottledBatchRequest = batchRequestOptionsAccessor.shouldDisableRetryForThrottledBatchRequest(requestOptions); return withContext(context -> { - final BatchExecutor executor = new BatchExecutor(this, cosmosBatch, requestOptionsInternal); + final BatchExecutor executor = + new BatchExecutor( + this, + cosmosBatch, + requestOptionsInternal, + disableRetryForThrottledBatchRequest); final Mono responseMono = executor.executeAsync(); CosmosAsyncClient client = database diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 1ebddafdb677..03590c1f8a5d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -937,13 +937,15 @@ Mono executeStoredProcedure(String storedProcedureLink, * @param options the request options. * @param disableAutomaticIdGeneration the flag for disabling automatic id generation. * @param disableStaledResourceExceptionHandling the flag for disabling staled resource exception handling. For bulk executor, the exception should bubbled up so to be retried correctly. + * @param disableRetryForThrottledBatchRequest the flag for disabling 429 retry for batch request. For bulk executor and transactional bulk executor, the exception need to be bubbled up. * @return a {@link Mono} containing the transactionalBatchResponse response which results of all operations. */ Mono executeBatchRequest(String collectionLink, ServerBatchRequest serverBatchRequest, RequestOptions options, boolean disableAutomaticIdGeneration, - boolean disableStaledResourceExceptionHandling); + boolean disableStaledResourceExceptionHandling, + boolean disableRetryForThrottledBatchRequest); /** * Creates a trigger. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index ca9731f86e50..2e01f73c531b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -62,7 +62,8 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, ThrottlingRetryOptions throttlingRetryOptions, RxCollectionCache rxCollectionCache, GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover globalPartitionEndpointManagerForPerPartitionAutomaticFailover) { + GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + boolean disableRetryForThrottledBatchRequest) { this.globalEndpointManager = globalEndpointManager; this.failoverRetryCount = 0; @@ -74,7 +75,7 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, throttlingRetryOptions.getMaxRetryAttemptsOnThrottledRequests(), throttlingRetryOptions.getMaxRetryWaitTime(), BridgeInternal.getRetryContext(this.getCosmosDiagnostics()), - false); + disableRetryForThrottledBatchRequest); this.metadataThrottlingRetry = new MetadataThrottlingRetryPolicy(BridgeInternal.getRetryContext(this.getCosmosDiagnostics())); this.rxCollectionCache = rxCollectionCache; this.faultInjectionRequestContext = new FaultInjectionRequestContext(); @@ -524,6 +525,7 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) { // In case PPAF is enabled and a location override exists for the partition key range assigned to the request this.globalPartitionEndpointManagerForPerPartitionAutomaticFailover.tryAddPartitionLevelLocationOverride(request); + this.throttlingRetry.onBeforeSendRequest(request); } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 18cca304ecd7..beadb0af6cf4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -19,8 +19,6 @@ import java.time.Duration; import java.util.EnumSet; import java.util.Locale; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import static com.azure.cosmos.implementation.guava25.base.MoreObjects.firstNonNull; import static com.azure.cosmos.implementation.guava25.base.Strings.emptyToNull; @@ -244,6 +242,10 @@ public class Configs { public static final String MAX_BULK_MICRO_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS_VARIABLE = "COSMOS_MAX_BULK_MICRO_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS"; public static final int DEFAULT_MAX_BULK_MICRO_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS = 1000; + public static final String BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS = "COSMOS.BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS"; + public static final String BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS_VARIABLE = "COSMOS_BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS"; + public static final int DEFAULT_BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS = 500; + // Config of CodingErrorAction on charset decoder for malformed input public static final String CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT = "COSMOS.CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT"; public static final String DEFAULT_CHARSET_DECODER_ERROR_ACTION_ON_MALFORMED_INPUT = StringUtils.EMPTY; @@ -714,6 +716,13 @@ public static int getMaxBulkMicroBatchFlushIntervalInMs() { return DEFAULT_MAX_BULK_MICRO_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS; } + public static int getBulkTransactionalBatchFlushIntervalInMs() { + return Integer.parseInt(System.getProperty(BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS, + firstNonNull( + emptyToNull(System.getenv().get(BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS_VARIABLE)), + String.valueOf(DEFAULT_BULK_TRANSACTIONAL_BATCH_FLUSH_INTERVAL_IN_MILLISECONDS)))); + } + public static int getMaxHttpRequestTimeout() { String valueFromSystemProperty = System.getProperty(HTTP_MAX_REQUEST_TIMEOUT); if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java index e20ef848ec67..b7db63c7e10b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java @@ -10,6 +10,7 @@ public class CosmosSchedulers { private final static String COSMOS_PARALLEL_THREAD_NAME = "cosmos-parallel"; private final static String TRANSPORT_RESPONSE_BOUNDED_ELASTIC_THREAD_NAME = "transport-response-bounded-elastic"; private final static String TRANSACTIONAL_BULK_EXECUTOR_BOUNDED_ELASTIC_THREAD_NAME = "transactional_bulk-executor-bounded-elastic"; + private final static String TRANSACTIONAL_BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC_THREAD_NAME = "transactional_bulk-executor-flush-bounded-elastic"; private final static String BULK_EXECUTOR_BOUNDED_ELASTIC_THREAD_NAME = "bulk-executor-bounded-elastic"; private final static String BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC_THREAD_NAME = "bulk-executor-flush-bounded-elastic"; private final static String OPEN_CONNECTIONS_BOUNDED_ELASTIC_THREAD_NAME = "open-connections-bounded-elastic"; @@ -44,6 +45,14 @@ public class CosmosSchedulers { true ); + public final static Scheduler TRANSACTIONAL_BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC = Schedulers.newBoundedElastic( + Schedulers.DEFAULT_BOUNDED_ELASTIC_SIZE, + Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE, + TRANSACTIONAL_BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC_THREAD_NAME, + TTL_FOR_SCHEDULER_WORKER_IN_SECONDS, + true + ); + // Custom bounded elastic scheduler process bulk execution tasks public final static Scheduler BULK_EXECUTOR_BOUNDED_ELASTIC = Schedulers.newBoundedElastic( 2 * Schedulers.DEFAULT_BOUNDED_ELASTIC_SIZE, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosTransactionalBulkExecutionOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosTransactionalBulkExecutionOptionsImpl.java index 40ce6cd7f89f..4e2e9d64fe16 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosTransactionalBulkExecutionOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosTransactionalBulkExecutionOptionsImpl.java @@ -9,6 +9,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.ReadConsistencyStrategy; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.batch.BatchRequestResponseConstants; import com.azure.cosmos.implementation.batch.BulkExecutorDiagnosticsTracker; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.CosmosRequestOptions; @@ -29,7 +30,11 @@ * It can be passed while processing bulk operations. */ public class CosmosTransactionalBulkExecutionOptionsImpl implements OverridableRequestOptions { - private int maxMicroBatchConcurrency = Configs.getMaxBulkMicroBatchConcurrency(); + private int maxOperationsConcurrency = BatchRequestResponseConstants.DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_OP_CONCURRENCY; + private int maxBatchesConcurrency = BatchRequestResponseConstants.DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_CONCURRENCY; + + private double maxBatchRetryRate = BatchRequestResponseConstants.DEFAULT_MAX_MICRO_BATCH_RETRY_RATE; + private double minBatchRetryRate = BatchRequestResponseConstants.DEFAULT_MIN_MICRO_BATCH_RETRY_RATE; private Integer maxConcurrentCosmosPartitions = null; private OperationContextAndListenerTuple operationContextAndListenerTuple; @@ -45,8 +50,9 @@ public class CosmosTransactionalBulkExecutionOptionsImpl implements OverridableR public CosmosTransactionalBulkExecutionOptionsImpl(CosmosTransactionalBulkExecutionOptionsImpl toBeCloned) { this.schedulerOverride = toBeCloned.schedulerOverride; - this.maxMicroBatchConcurrency = toBeCloned.maxMicroBatchConcurrency; this.maxConcurrentCosmosPartitions = toBeCloned.maxConcurrentCosmosPartitions; + this.maxOperationsConcurrency = toBeCloned.maxOperationsConcurrency; + this.maxBatchesConcurrency = toBeCloned.maxBatchesConcurrency; this.throughputControlGroupName = toBeCloned.throughputControlGroupName; this.operationContextAndListenerTuple = toBeCloned.operationContextAndListenerTuple; this.diagnosticsTracker = toBeCloned.diagnosticsTracker; @@ -62,12 +68,8 @@ public CosmosTransactionalBulkExecutionOptionsImpl(CosmosTransactionalBulkExecut } } - public CosmosTransactionalBulkExecutionOptionsImpl(Map customOptions) { - if (customOptions == null) { - this.customOptions = new HashMap<>(); - } else { - this.customOptions = customOptions; - } + public CosmosTransactionalBulkExecutionOptionsImpl() { + this.customOptions = new HashMap<>(); } public CosmosItemSerializer getCustomItemSerializer() { @@ -86,15 +88,44 @@ public void setMaxConcurrentCosmosPartitions(int maxConcurrentCosmosPartitions) this.maxConcurrentCosmosPartitions = maxConcurrentCosmosPartitions; } - public int getMaxMicroBatchConcurrency() { - return maxMicroBatchConcurrency; + public int getMaxOperationsConcurrency() { + return this.maxOperationsConcurrency; + } + + public void setMaxOperationsConcurrency(int maxOperationsConcurrency) { + this.maxOperationsConcurrency = maxOperationsConcurrency; } - public void setMaxMicroBatchConcurrency(int maxMicroBatchConcurrency) { + public int getMaxBatchesConcurrency() { + return maxBatchesConcurrency; + } + + public void setMaxBatchesConcurrency(int maxBatchesConcurrency) { checkArgument( - maxMicroBatchConcurrency >= 1 && maxMicroBatchConcurrency <= 5, - "maxMicroBatchConcurrency should be between [1, 5]"); - this.maxMicroBatchConcurrency = maxMicroBatchConcurrency; + maxBatchesConcurrency >= 1 && maxBatchesConcurrency <= 5, + "maxBatchesConcurrency should be between [1, 5]"); + this.maxBatchesConcurrency = maxBatchesConcurrency; + } + + public void setTargetedMicroBatchRetryRate(double minRetryRate, double maxRetryRate) { + if (minRetryRate < 0) { + throw new IllegalArgumentException("The minRetryRate must not be a negative value"); + } + + if (minRetryRate > maxRetryRate) { + throw new IllegalArgumentException("The minRetryRate must not exceed the maxRetryRate"); + } + + this.maxBatchRetryRate = maxRetryRate; + this.minBatchRetryRate = minRetryRate; + } + + public double getMaxBatchRetryRate() { + return maxBatchRetryRate; + } + + public double getMinBatchRetryRate() { + return minBatchRetryRate; } public OperationContextAndListenerTuple getOperationContextAndListenerTuple() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicyFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicyFactory.java index 4dbb77720d16..daa566ca064e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicyFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicyFactory.java @@ -8,5 +8,6 @@ */ public interface IRetryPolicyFactory { DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clientContextOverride); + DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clientContextOverride, boolean disableRetryForThrottledBatchRequest); RetryContext getRetryContext(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 1c9720c47912..0c81df04390a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -33,6 +33,7 @@ import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.batch.ItemBatchOperation; import com.azure.cosmos.implementation.batch.PartitionScopeThresholds; +import com.azure.cosmos.implementation.batch.TransactionalBatchRetryPolicy; import com.azure.cosmos.implementation.clienttelemetry.AttributeNamingScheme; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.clienttelemetry.CosmosMeterOptions; @@ -1176,6 +1177,11 @@ CosmosBatchRequestOptions setEndToEndOperationLatencyPolicyConfig( CosmosBatchRequestOptions setOperationContextAndListenerTuple( CosmosBatchRequestOptions cosmosBatchRequestOptions, OperationContextAndListenerTuple operationContextAndListenerTuple); + CosmosBatchRequestOptions setDisableRetryForThrottledBatchRequest( + CosmosBatchRequestOptions cosmosBatchRequestOptions, + boolean disableRetryForThrottledBatchRequest + ); + boolean shouldDisableRetryForThrottledBatchRequest(CosmosBatchRequestOptions cosmosBatchRequestOptions); } } @@ -1284,6 +1290,8 @@ public static void setCosmosBatchAccessor(CosmosBatchAccessor newAccessor) { public interface CosmosBatchAccessor { List> getOperationsInternal(CosmosBatch cosmosBatch); + CosmosBatch setRetryPolicy(CosmosBatch cosmosBatch, TransactionalBatchRetryPolicy transactionalBatchRetryPolicy); + TransactionalBatchRetryPolicy getRetryPolicy(CosmosBatch cosmosBatch); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResetSessionTokenRetryPolicyFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResetSessionTokenRetryPolicyFactory.java index 793c7c3d561a..0c576b953343 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResetSessionTokenRetryPolicyFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResetSessionTokenRetryPolicyFactory.java @@ -18,7 +18,18 @@ public ResetSessionTokenRetryPolicyFactory(ISessionContainer sessionContainer, R @Override public DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clientContextOverride) { - return new RenameCollectionAwareClientRetryPolicy(this.sessionContainer, this.collectionCache, retryPolicy.getRequestPolicy(clientContextOverride)); + return getRequestPolicy(clientContextOverride, false); + } + + @Override + public DocumentClientRetryPolicy getRequestPolicy( + DiagnosticsClientContext clientContextOverride, + boolean disableRetryForThrottledBatchRequest) { + + return new RenameCollectionAwareClientRetryPolicy( + this.sessionContainer, + this.collectionCache, + retryPolicy.getRequestPolicy(clientContextOverride, disableRetryForThrottledBatchRequest)); } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResourceThrottleRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResourceThrottleRetryPolicy.java index bea73e015285..dbb5f88f6c3d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResourceThrottleRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ResourceThrottleRetryPolicy.java @@ -28,31 +28,32 @@ public class ResourceThrottleRetryPolicy extends DocumentClientRetryPolicy { private int currentAttemptCount; private Duration cumulativeRetryDelay; private RetryContext retryContext; - private final boolean retryOnClientSideThrottledBatchRequests; + private final boolean disableRetryForThrottledBatchRequest; + private RxDocumentServiceRequest request; public ResourceThrottleRetryPolicy( int maxAttemptCount, Duration maxWaitTime, RetryContext retryContext, - boolean retryOnClientSideThrottledBatchRequests) { + boolean disableRetryForThrottledBatchRequest) { - this(maxAttemptCount, maxWaitTime, retryOnClientSideThrottledBatchRequests); + this(maxAttemptCount, maxWaitTime, disableRetryForThrottledBatchRequest); this.retryContext = retryContext; } public ResourceThrottleRetryPolicy( int maxAttemptCount, Duration maxWaitTime, - boolean retryOnClientSideThrottledBatchRequests) { + boolean disableRetryForThrottledBatchRequest) { - this(maxAttemptCount, maxWaitTime, 1, retryOnClientSideThrottledBatchRequests); + this(maxAttemptCount, maxWaitTime, 1, disableRetryForThrottledBatchRequest); } public ResourceThrottleRetryPolicy( int maxAttemptCount, Duration maxWaitTime, int backoffDelayFactor, - boolean retryOnClientSideThrottledBatchRequests) { + boolean disableRetryForThrottledBatchRequest) { Utils.checkStateOrThrow(maxWaitTime.getSeconds() <= Integer.MAX_VALUE / 1000, "maxWaitTime", "maxWaitTime must not be larger than " + Integer.MAX_VALUE / 1000); @@ -61,7 +62,7 @@ public ResourceThrottleRetryPolicy( this.maxWaitTime = maxWaitTime; this.currentAttemptCount = 0; this.cumulativeRetryDelay = Duration.ZERO; - this.retryOnClientSideThrottledBatchRequests = retryOnClientSideThrottledBatchRequests; + this.disableRetryForThrottledBatchRequest = disableRetryForThrottledBatchRequest; } @Override @@ -77,8 +78,15 @@ public Mono shouldRetry(Exception exception) { return Mono.just(ShouldRetryResult.errorOnNonRelatedException(exception)); } - if (!retryOnClientSideThrottledBatchRequests && - dce.getSubStatusCode() == HttpConstants.SubStatusCodes.THROUGHPUT_CONTROL_BULK_REQUEST_RATE_TOO_LARGE) { + if (disableRetryForThrottledBatchRequest && + this.request != null && + this.request.getOperationType() == OperationType.Batch && + this.request.getResourceType() == ResourceType.Document) { + + logger.trace( + "Operation will NOT be retried - retry is disabled for batch request. Current attempt {}", + this.currentAttemptCount, + exception); return Mono.just(ShouldRetryResult.noRetry()); } @@ -113,7 +121,7 @@ public Mono shouldRetry(Exception exception) { @Override public void onBeforeSendRequest(RxDocumentServiceRequest request) { - // no op + this.request = request; } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java index 882850b2b796..7a1404894ab3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java @@ -40,6 +40,14 @@ public RetryPolicy( @Override public DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clientContextOverride) { + return getRequestPolicy(clientContextOverride, false); + } + + @Override + public DocumentClientRetryPolicy getRequestPolicy( + DiagnosticsClientContext clientContextOverride, + boolean disableRetryForThrottledBatchRequest) { + DiagnosticsClientContext effectiveClientContext = this.diagnosticsClientContext; if (clientContextOverride != null) { effectiveClientContext = clientContextOverride; @@ -51,7 +59,8 @@ public DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clien this.throttlingRetryOptions, this.rxCollectionCache, this.globalPartitionEndpointManagerForPerPartitionCircuitBreaker, - this.globalPartitionEndpointManagerForPerPartitionAutomaticFailover); + this.globalPartitionEndpointManagerForPerPartitionAutomaticFailover, + disableRetryForThrottledBatchRequest); return clientRetryPolicy; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 025e6c96e657..c1d36511d969 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5188,7 +5188,8 @@ public Mono executeBatchRequest(String collectionLink, ServerBatchRequest serverBatchRequest, RequestOptions options, boolean disableAutomaticIdGeneration, - boolean disableStaledResourceExceptionHandling) { + boolean disableStaledResourceExceptionHandling, + boolean disableRetryForThrottledBatchRequest) { AtomicReference requestReference = new AtomicReference<>(); Consumer gwModeE2ETimeoutDiagnosticHandler @@ -5204,7 +5205,11 @@ public Mono executeBatchRequest(String collectionLink, ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(this, false); scopedDiagnosticsFactory.setGwModeE2ETimeoutDiagnosticsHandler(gwModeE2ETimeoutDiagnosticHandler); - DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); + DocumentClientRetryPolicy documentClientRetryPolicy = + this.resetSessionTokenRetryPolicy.getRequestPolicy( + scopedDiagnosticsFactory, + disableRetryForThrottledBatchRequest); + if (!disableStaledResourceExceptionHandling) { documentClientRetryPolicy = new StaleResourceRetryPolicy( this.collectionCache, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchExecutor.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchExecutor.java index 21def8757058..001347c37f88 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchExecutor.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchExecutor.java @@ -24,12 +24,14 @@ public final class BatchExecutor { private final RequestOptions options; private final CosmosBatch cosmosBatch; private final CosmosItemSerializer effectiveItemSerializer; + private final boolean disableRetryForThrottledBatchRequest; public BatchExecutor( final CosmosAsyncContainer container, final CosmosBatch cosmosBatch, - final RequestOptions options) { + final RequestOptions options, + final boolean disableRetryForThrottledBatchRequest) { this.container = container; this.cosmosBatch = cosmosBatch; @@ -38,6 +40,7 @@ public BatchExecutor( this.effectiveItemSerializer = docClientWrapper.getEffectiveItemSerializer( this.options != null ? this.options.getEffectiveItemSerializer() : null ); + this.disableRetryForThrottledBatchRequest = disableRetryForThrottledBatchRequest; } /** @@ -63,6 +66,7 @@ public Mono executeAsync() { request, options, false, - false); + false, + this.disableRetryForThrottledBatchRequest); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchRequestResponseConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchRequestResponseConstants.java index 713a82242a51..1d9d4d18b2c2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchRequestResponseConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BatchRequestResponseConstants.java @@ -13,8 +13,11 @@ public final class BatchRequestResponseConstants { public static final int DEFAULT_MAX_DIRECT_MODE_BATCH_REQUEST_BODY_SIZE_IN_BYTES = 220201; public static final int MAX_OPERATIONS_IN_DIRECT_MODE_BATCH_REQUEST = 100; public static final int DEFAULT_MAX_MICRO_BATCH_INTERVAL_AFTER_DRAINING_INCOMING_FLUX_IN_MILLISECONDS = 100; + public static final int DEFAULT_MAX_TRANSACTIONAL_BATCH_INTERVAL_AFTER_DRAINING_INCOMING_FLUX_IN_MILLISECONDS = 100; public static final double DEFAULT_MIN_MICRO_BATCH_RETRY_RATE = 0.1; public static final double DEFAULT_MAX_MICRO_BATCH_RETRY_RATE = 0.2; + public static final int DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_OP_CONCURRENCY = 100; + public static final int DEFAULT_MAX_BULK_TRANSACTIONAL_BATCH_CONCURRENCY = 5; static final String FIELD_OPERATION_TYPE = "operationType"; static final String FIELD_ID = "id"; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutor.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutor.java index f37e5945c04e..99acc8b2ef6b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutor.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutor.java @@ -898,7 +898,8 @@ private Mono executeBatchRequest( serverRequest, options, false, - true) // disable the staled resource exception handling as it is being handled in the BulkOperationRetryPolicy + true, // disable the staled resource exception handling as it is being handled in the BulkOperationRetryPolicy + true) .flatMap(cosmosBatchResponse -> { cosmosBatchResponseAccessor.setGlobalOpCount( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java index a2de55b80640..22746b76a68d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java @@ -18,6 +18,7 @@ import com.azure.cosmos.implementation.routing.CollectionRoutingMap; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; import com.azure.cosmos.models.CosmosBatchOperationResult; +import com.azure.cosmos.models.CosmosBatchResponse; import com.azure.cosmos.models.CosmosItemOperation; import com.azure.cosmos.models.CosmosItemOperationType; import com.azure.cosmos.models.ModelBridgeInternal; @@ -31,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; import static com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper.getEffectivePartitionKeyString; @@ -63,7 +65,7 @@ static void setRetryPolicyForBulk( ResourceThrottleRetryPolicy resourceThrottleRetryPolicy = new ResourceThrottleRetryPolicy( throttlingRetryOptions.getMaxRetryAttemptsOnThrottledRequests(), throttlingRetryOptions.getMaxRetryWaitTime(), - true); + false); BulkOperationRetryPolicy bulkRetryPolicy = new BulkOperationRetryPolicy( docClientWrapper.getCollectionCache(), @@ -91,6 +93,19 @@ static Map getResponseHeadersFromBatchOperationResult(CosmosBatc return headers; } + static Map getResponseHeadersFromBatchOperationResult(CosmosBatchResponse result) { + final Map headers = new HashMap<>(); + + headers.put(HttpConstants.HttpHeaders.SUB_STATUS, String.valueOf(result.getSubStatusCode())); + headers.put(HttpConstants.HttpHeaders.REQUEST_CHARGE, String.valueOf(result.getRequestCharge())); + + if (result.getRetryAfterDuration() != null) { + headers.put(HttpConstants.HttpHeaders.RETRY_AFTER_IN_MILLISECONDS, String.valueOf(result.getRetryAfterDuration().toMillis())); + } + + return headers; + } + /** * Resolve partition key range id of a operation and set the partition key json value in operation. * @@ -110,60 +125,77 @@ static Mono resolvePartitionKeyRangeId( if (operation instanceof ItemBulkOperation) { final ItemBulkOperation itemBulkOperation = (ItemBulkOperation) operation; - return Mono.defer(() -> - BulkExecutorUtil.getCollectionInfoAsync(docClientWrapper, container, collectionBeforeRecreation.get()) - .flatMap(collection -> { - final PartitionKeyDefinition definition = collection.getPartitionKey(); - final PartitionKeyInternal partitionKeyInternal = getPartitionKeyInternal(operation, definition); - itemBulkOperation.setPartitionKeyJson(partitionKeyInternal.toJson()); - - return docClientWrapper.getPartitionKeyRangeCache() - .tryLookupAsync(null, collection.getResourceId(), null, null) - .map((Utils.ValueHolder routingMap) -> { - - if (routingMap.v == null) { - collectionBeforeRecreation.set(collection); - throw new CollectionRoutingMapNotFoundException( - String.format( - "No collection routing map found for container %s(%s) in database %s.", - container.getId(), - collection.getResourceId(), - container.getDatabase().getId()) - ); - } - - return routingMap.v.getRangeByEffectivePartitionKey( - getEffectivePartitionKeyString( - partitionKeyInternal, - definition)).getId(); - }); - })) - .retryWhen(Retry - .fixedDelay( - BatchRequestResponseConstants.MAX_COLLECTION_RECREATION_RETRY_COUNT, - Duration.ofSeconds( - BatchRequestResponseConstants.MAX_COLLECTION_RECREATION_REFRESH_INTERVAL_IN_SECONDS)) - .filter(t -> t instanceof CollectionRoutingMapNotFoundException) - .doBeforeRetry((retrySignal) -> docClientWrapper - .getCollectionCache() - .refresh( - null, - Utils.getCollectionName(BridgeInternal.getLink(container)), - null) - ) - ); + return resolvePartitionKeyRangeId( + docClientWrapper, + container, + operation.getPartitionKeyValue(), + (partitionKeyInternal -> itemBulkOperation.setPartitionKeyJson(partitionKeyInternal.toJson()))); + } else { throw new UnsupportedOperationException("Unknown CosmosItemOperation."); } } + static Mono resolvePartitionKeyRangeId( + AsyncDocumentClient docClientWrapper, + CosmosAsyncContainer container, + PartitionKey partitionKey, + Consumer partitionKeyInternalConsumer) { + + AtomicReference collectionBeforeRecreation = new AtomicReference<>(null); + + return Mono.defer(() -> + BulkExecutorUtil + .getCollectionInfoAsync(docClientWrapper, container, collectionBeforeRecreation.get()) + .flatMap(collection -> { + final PartitionKeyDefinition definition = collection.getPartitionKey(); + final PartitionKeyInternal partitionKeyInternal = getPartitionKeyInternal(partitionKey, definition); + if (partitionKeyInternalConsumer != null) { + partitionKeyInternalConsumer.accept(partitionKeyInternal); + } + + return docClientWrapper + .getPartitionKeyRangeCache() + .tryLookupAsync(null, collection.getResourceId(), null, null) + .map((Utils.ValueHolder routingMap) -> { + + if (routingMap.v == null) { + collectionBeforeRecreation.set(collection); + throw new CollectionRoutingMapNotFoundException( + String.format( + "No collection routing map found for container %s(%s) in database %s.", + container.getId(), + collection.getResourceId(), + container.getDatabase().getId()) + ); + } + + return routingMap.v.getRangeByEffectivePartitionKey( + getEffectivePartitionKeyString( + partitionKeyInternal, + definition)).getId(); + }); + })) + .retryWhen( + Retry + .fixedDelay( + BatchRequestResponseConstants.MAX_COLLECTION_RECREATION_RETRY_COUNT, + Duration.ofSeconds(BatchRequestResponseConstants.MAX_COLLECTION_RECREATION_REFRESH_INTERVAL_IN_SECONDS)) + .filter(t -> t instanceof CollectionRoutingMapNotFoundException) + .doBeforeRetry((retrySignal) -> docClientWrapper + .getCollectionCache() + .refresh( + null, + Utils.getCollectionName(BridgeInternal.getLink(container)), + null) + ) + ); + } + private static PartitionKeyInternal getPartitionKeyInternal( - final CosmosItemOperation operation, + final PartitionKey partitionKey, final PartitionKeyDefinition partitionKeyDefinition) { - checkNotNull(operation, "expected non-null operation"); - - final PartitionKey partitionKey = operation.getPartitionKeyValue(); if (partitionKey == null) { return ModelBridgeInternal.getNonePartitionKey(partitionKeyDefinition); } else { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/CosmosBulkTransactionalBatchResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/CosmosBulkTransactionalBatchResponse.java new file mode 100644 index 000000000000..69da7ee153a1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/CosmosBulkTransactionalBatchResponse.java @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.batch; + +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class CosmosBulkTransactionalBatchResponse { + private final CosmosBatch cosmosBatch; + private final CosmosBatchResponse response; + private final Exception exception; + + public CosmosBulkTransactionalBatchResponse( + CosmosBatch cosmosBatch, + CosmosBatchResponse response, + Exception exception) { + + checkNotNull(cosmosBatch, "Argument 'cosmosBatch' can not be null"); + this.cosmosBatch = cosmosBatch; + this.response = response; + this.exception = exception; + } + + + public CosmosBatch getCosmosBatch() { + return cosmosBatch; + } + + public Exception getException() { + return exception; + } + + public CosmosBatchResponse getResponse() { + return response; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/PartitionScopeThresholds.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/PartitionScopeThresholds.java index c651f08d68f6..578a536351c8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/PartitionScopeThresholds.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/PartitionScopeThresholds.java @@ -5,12 +5,12 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.CosmosBulkExecutionOptionsImpl; +import com.azure.cosmos.implementation.CosmosTransactionalBulkExecutionOptionsImpl; import com.azure.cosmos.implementation.UUIDs; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -21,7 +21,6 @@ public class PartitionScopeThresholds { private final static Logger logger = LoggerFactory.getLogger(PartitionScopeThresholds.class); private final String pkRangeId; - private final CosmosBulkExecutionOptionsImpl options; private final AtomicInteger targetMicroBatchSize; private final AtomicLong totalOperationCount; private final AtomicReference currentThresholds; @@ -32,29 +31,64 @@ public class PartitionScopeThresholds { private final int maxMicroBatchSize; private final int minTargetMicroBatchSize; + private static CosmosBulkExecutionOptionsImpl validateOptions(CosmosBulkExecutionOptionsImpl options) { + checkNotNull(options, "expected non-null options"); + return options; + } + + private static CosmosTransactionalBulkExecutionOptionsImpl validateOptions(CosmosTransactionalBulkExecutionOptionsImpl options) { + checkNotNull(options, "expected non-null options"); + return options; + } + public PartitionScopeThresholds(String pkRangeId, CosmosBulkExecutionOptionsImpl options) { + this( + pkRangeId, + validateOptions(options).getMinTargetedMicroBatchRetryRate(), + validateOptions(options).getMaxTargetedMicroBatchRetryRate(), + validateOptions(options).getInitialMicroBatchSize(), + validateOptions(options).getMaxMicroBatchSize(), + validateOptions(options).getMinTargetMicroBatchSize()); + + } + + public PartitionScopeThresholds(String pkRangeId, CosmosTransactionalBulkExecutionOptionsImpl options) { + this( + pkRangeId, + validateOptions(options).getMinBatchRetryRate(), + validateOptions(options).getMaxBatchRetryRate(), + 1, // for transactional batch, we start with small batch size to avoid sudden RU spike + validateOptions(options).getMaxOperationsConcurrency(), + 1); + } + + PartitionScopeThresholds( + String pkRangeId, + double minRetryRate, + double maxRetryRate, + int initialMicroBatchSize, + int maxMicroBatchSize, + int minMicroBatchSize) { checkNotNull(pkRangeId, "expected non-null pkRangeId"); - checkNotNull(options, "expected non-null options"); this.pkRangeId = pkRangeId; - this.options = options; this.totalOperationCount = new AtomicLong(0); this.currentThresholds = new AtomicReference<>(new CurrentIntervalThresholds()); - this.minRetryRate = options.getMinTargetedMicroBatchRetryRate(); - this.maxRetryRate = options.getMaxTargetedMicroBatchRetryRate(); + this.minRetryRate = minRetryRate; + this.maxRetryRate = maxRetryRate; this.avgRetryRate = ((this.maxRetryRate + this.minRetryRate)/2); this.maxMicroBatchSize = Math.min( - options.getMaxMicroBatchSize(), + maxMicroBatchSize, BatchRequestResponseConstants.MAX_OPERATIONS_IN_DIRECT_MODE_BATCH_REQUEST); this.minTargetMicroBatchSize = Math.max( - options.getMinTargetMicroBatchSize(), + minMicroBatchSize, Configs.getMinTargetBulkMicroBatchSize() ); this.targetMicroBatchSize = new AtomicInteger( Math.max( - Math.min(options.getInitialMicroBatchSize(), this.maxMicroBatchSize), + Math.min(initialMicroBatchSize, this.maxMicroBatchSize), Math.min(this.minTargetMicroBatchSize, this.maxMicroBatchSize))); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBatchRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBatchRetryPolicy.java new file mode 100644 index 000000000000..54ab5ee195d5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBatchRetryPolicy.java @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.batch; + +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.Exceptions; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.IRetryPolicy; +import com.azure.cosmos.implementation.ResourceThrottleRetryPolicy; +import com.azure.cosmos.implementation.RetryContext; +import com.azure.cosmos.implementation.ShouldRetryResult; +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; +import reactor.core.publisher.Mono; + +public class TransactionalBatchRetryPolicy implements IRetryPolicy { + + private static final int MAX_RETRIES = 1; + + private final RxCollectionCache collectionCache; + private final RxPartitionKeyRangeCache partitionKeyRangeCache; + private final String collectionLink; + private final ResourceThrottleRetryPolicy resourceThrottleRetryPolicy; + private int attemptedRetries; + + TransactionalBatchRetryPolicy( + RxCollectionCache collectionCache, + RxPartitionKeyRangeCache partitionKeyRangeCache, + String resourceFullName, + ResourceThrottleRetryPolicy resourceThrottleRetryPolicy) { + + this.collectionCache = collectionCache; + this.partitionKeyRangeCache = partitionKeyRangeCache; + + // Similar to PartitionKeyMismatchRetryPolicy constructor. + collectionLink = Utils.getCollectionName(resourceFullName); + this.resourceThrottleRetryPolicy = resourceThrottleRetryPolicy; + } + + @Override + public Mono shouldRetry(Exception exception) { + + if (this.resourceThrottleRetryPolicy == null) { + return Mono.just(ShouldRetryResult.noRetry()); + } + + return this.resourceThrottleRetryPolicy.shouldRetry(exception); + } + + @Override + public RetryContext getRetryContext() { + return this.resourceThrottleRetryPolicy.getRetryContext(); + } + + Mono shouldRetryInMainSink(CosmosException exception) { + + int statusCode = exception.getStatusCode(); + int subStatusCode = exception.getSubStatusCode(); + + if (Exceptions.isStaledResourceException(statusCode, subStatusCode)) { + refreshCollectionCache(); + return Mono.just(true); + } + + if (statusCode == HttpConstants.StatusCodes.GONE) { + if (++this.attemptedRetries > MAX_RETRIES) { + return Mono.just(false); + } + + if ((subStatusCode == HttpConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE || + subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_SPLIT_OR_MERGE || + subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_PARTITION_MIGRATION)) { + return collectionCache + .resolveByNameAsync(null, collectionLink, null) + .flatMap(collection -> this.partitionKeyRangeCache + .tryGetOverlappingRangesAsync(null /*metaDataDiagnosticsContext*/, + collection.getResourceId(), + FeedRangeEpkImpl.forFullRange() + .getRange(), + true, + null /*properties*/) + .then(Mono.just(true))); + } + + return Mono.just(true); + } + + return Mono.just(false); + } + + private void refreshCollectionCache() { + this.collectionCache.refresh( + null, + this.collectionLink, + null); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutor.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutor.java index 7237a76b7b6e..e780355f6fcc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutor.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/TransactionalBulkExecutor.java @@ -7,31 +7,45 @@ import com.azure.cosmos.CosmosBridgeInternal; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosItemSerializer; +import com.azure.cosmos.ThrottlingRetryOptions; import com.azure.cosmos.implementation.AsyncDocumentClient; +import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.CosmosSchedulers; import com.azure.cosmos.implementation.CosmosTransactionalBulkExecutionOptionsImpl; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.ResourceThrottleRetryPolicy; import com.azure.cosmos.implementation.UUIDs; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchRequestOptions; import com.azure.cosmos.models.CosmosBatchResponse; -import com.azure.cosmos.models.CosmosItemOperation; -import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.BridgeInternal; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.Disposable; +import reactor.core.Exceptions; import reactor.core.publisher.Flux; +import reactor.core.publisher.GroupedFlux; import reactor.core.publisher.Mono; import reactor.core.publisher.SignalType; +import reactor.core.publisher.Sinks; import reactor.core.scheduler.Scheduler; +import com.azure.cosmos.CosmosException; +import java.time.Duration; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import static com.azure.cosmos.implementation.batch.BatchRequestResponseConstants.DEFAULT_MAX_TRANSACTIONAL_BATCH_INTERVAL_AFTER_DRAINING_INCOMING_FLUX_IN_MILLISECONDS; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; /** @@ -40,7 +54,7 @@ * The actual execution of the flux of operations. It is done in following steps: * 1. Getting partition key range ID and grouping operations using that id. - * 2. For the flux of operations in a group, adding buffering based on size and a duration. + * 2. For the flux of operations in a group, using two simple counters totalOperationsInFlight and totalBatchesInFlight and a flushSignalFlux to control concurrency. * 3. For the operation we get in after buffering, process it using a batch request and return * a wrapper having request, response(if-any) and exception(if-any). Either response or exception will be there. * @@ -50,12 +64,8 @@ * 6. At the end and this is very essential, we close all the sinks as the sink continues to waits for more and the * execution isn't finished even if all the operations have been executed(figured out by completion call of source) * - * Note: Sink will move to a new interface from 3.5 and this is documentation for it: - * - https://github.com/reactor/reactor-core/blob/master/docs/asciidoc/processors.adoc - * - * For our use case, Sinks.many().unicast() will work. - */ -public final class TransactionalBulkExecutor implements Disposable { + **/ +public final class TransactionalBulkExecutor implements Disposable { private final static Logger logger = LoggerFactory.getLogger(TransactionalBulkExecutor.class); private final static AtomicLong instanceCount = new AtomicLong(0); @@ -63,15 +73,22 @@ public final class TransactionalBulkExecutor implements Disposable { private static final ImplementationBridgeHelpers.CosmosBatchRequestOptionsHelper.CosmosBatchRequestOptionsAccessor cosmosBatchRequestOptionsAccessor = ImplementationBridgeHelpers.CosmosBatchRequestOptionsHelper.getCosmosBatchRequestOptionsAccessor(); + private static final ImplementationBridgeHelpers.CosmosBatchHelper.CosmosBatchAccessor cosmosBatchAccessor = + ImplementationBridgeHelpers.CosmosBatchHelper.getCosmosBatchAccessor(); + private final CosmosAsyncContainer container; private final AsyncDocumentClient docClientWrapper; private final String operationContextText; private final OperationContextAndListenerTuple operationListener; private final Flux inputBatches; - private final CosmosTransactionalBulkExecutionOptionsImpl transactionalBulkExecutionOptions; + private final CosmosTransactionalBulkExecutionOptionsImpl transactionalBulkExecutionOptionsImpl; - // Handle gone error: + // Partition thresholds map + private final ConcurrentMap partitionScopeThresholds; + + private final AtomicBoolean mainSourceCompleted = new AtomicBoolean(false); + // Handle shutdown private final AtomicBoolean isDisposed = new AtomicBoolean(false); private final AtomicBoolean isShutdown = new AtomicBoolean(false); private final AtomicInteger totalCount; @@ -79,7 +96,14 @@ public final class TransactionalBulkExecutor implements Disposable { private final BulkExecutorDiagnosticsTracker diagnosticsTracker; private final CosmosItemSerializer effectiveItemSerializer; private final Scheduler executionScheduler; + private final ThrottlingRetryOptions throttlingRetryOptions; + private final static Sinks.EmitFailureHandler serializedEmitFailureHandler = new SerializedEmitFailureHandler(); + private final static Sinks.EmitFailureHandler serializedCompleteEmitFailureHandler = new SerializedCompleteEmitFailureHandler(); + private final Sinks.Many mainSink; + private final List> groupSinks; + private final List> flushSignalGroupSinks; + private final AtomicReference scheduledFutureForFlush; @SuppressWarnings({"unchecked"}) public TransactionalBulkExecutor( @@ -91,12 +115,12 @@ public TransactionalBulkExecutor( checkNotNull(inputBatches, "expected non-null inputOperations"); checkNotNull(transactionalBulkOptions, "expected non-null transactionalBulkOptions"); - this.transactionalBulkExecutionOptions = transactionalBulkOptions; + this.transactionalBulkExecutionOptionsImpl = transactionalBulkOptions; this.container = container; this.inputBatches = inputBatches; this.docClientWrapper = CosmosBridgeInternal.getAsyncDocumentClient(container.getDatabase()); this.effectiveItemSerializer = this.docClientWrapper.getEffectiveItemSerializer(transactionalBulkOptions.getCustomItemSerializer()); - operationListener = transactionalBulkExecutionOptions.getOperationContextAndListenerTuple(); + this.operationListener = transactionalBulkExecutionOptionsImpl.getOperationContextAndListenerTuple(); if (operationListener != null && operationListener.getOperationContext() != null) { operationContextText = identifier + "[" + operationListener.getOperationContext().toString() + "]"; @@ -104,22 +128,46 @@ public TransactionalBulkExecutor( operationContextText = identifier +"[n/a]"; } - this.diagnosticsTracker = transactionalBulkExecutionOptions.getDiagnosticsTracker(); + this.diagnosticsTracker = transactionalBulkExecutionOptionsImpl.getDiagnosticsTracker(); + this.throttlingRetryOptions = docClientWrapper.getConnectionPolicy().getThrottlingRetryOptions(); + + this.totalCount = new AtomicInteger(0); - totalCount = new AtomicInteger(0); + // Initialize main sinks for rerouting batches on partition changes + this.mainSink = Sinks.many().unicast().onBackpressureBuffer(); + // Initialize group sinks for retriable exceptions + this.groupSinks = new CopyOnWriteArrayList<>(); + // Initialize flush signal group sinks for flush batch signals + this.flushSignalGroupSinks = new CopyOnWriteArrayList<>(); + + // Initialize partition thresholds map and default options for thresholds. + this.partitionScopeThresholds = new ConcurrentHashMap<>(); Scheduler schedulerSnapshotFromOptions = transactionalBulkOptions.getSchedulerOverride(); this.executionScheduler = schedulerSnapshotFromOptions != null ? schedulerSnapshotFromOptions : CosmosSchedulers.TRANSACTIONAL_BULK_EXECUTOR_BOUNDED_ELASTIC; - logger.info("Instantiated TransactionalBulkExecutor, Context: {}", - this.operationContextText); + // setup this background task which will try to emit flush signal, + // A safeguard to prevent the pipeline got stuck in case when a cosmosBatch completes, + // the flush signal is not issued successfully or missed + int flushInterval = Configs.getBulkTransactionalBatchFlushIntervalInMs(); + this.scheduledFutureForFlush = new AtomicReference<>( + CosmosSchedulers + .TRANSACTIONAL_BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC + .schedulePeriodically( + this::onFlush, + flushInterval, + flushInterval, + TimeUnit.MILLISECONDS)); + + logger.info("Instantiated TransactionalBulkExecutor, Context: {}", this.operationContextText); } @Override public void dispose() { if (this.isDisposed.compareAndSet(false, true)) { + logDebugOrWarning("Transactional bulk executor is disposed"); long totalCountSnapshot = totalCount.get(); if (totalCountSnapshot == 0) { completeAllSinks(); @@ -134,6 +182,32 @@ public boolean isDisposed() { return this.isDisposed.get(); } + private void cancelFlushTask(boolean initializeAggressiveFlush) { + long flushIntervalAfterDrainingIncomingFlux = + DEFAULT_MAX_TRANSACTIONAL_BATCH_INTERVAL_AFTER_DRAINING_INCOMING_FLUX_IN_MILLISECONDS; + + Disposable newFlushTask = initializeAggressiveFlush + ? CosmosSchedulers + .TRANSACTIONAL_BULK_EXECUTOR_FLUSH_BOUNDED_ELASTIC + .schedulePeriodically( + this::onFlush, + flushIntervalAfterDrainingIncomingFlux, + flushIntervalAfterDrainingIncomingFlux, + TimeUnit.MILLISECONDS) + : null; + + Disposable scheduledFutureSnapshot = this.scheduledFutureForFlush.getAndSet(newFlushTask); + + if (scheduledFutureSnapshot != null) { + try { + scheduledFutureSnapshot.dispose(); + logDebugOrWarning("Cancelled all future scheduled flush tasks {}, Context: {}", getThreadInfo(), this.operationContextText); + } catch (Exception e) { + logger.warn("Failed to cancel scheduled flush tasks{}, Context: {}", getThreadInfo(), this.operationContextText, e); + } + } + } + private void logInfoOrWarning(String msg, Object... args) { if (this.diagnosticsTracker == null || !this.diagnosticsTracker.verboseLoggingAfterReEnqueueingRetriesEnabled()) { logger.info(msg, args); @@ -161,11 +235,30 @@ private void logDebugOrWarning(String msg, Object... args) { private void shutdown() { if (this.isShutdown.compareAndSet(false, true)) { logDebugOrWarning("Shutting down, Context: {}", this.operationContextText); + + this.cancelFlushTask(false); + + // Complete all flush group sinks so any waiting subscribers can finish + try { + flushSignalGroupSinks.forEach(Sinks.Many::tryEmitComplete); + logger.debug("All flush group sinks completed, Context: {}", this.operationContextText); + } catch (Throwable t) { + logger.warn("Error completing flush group sinks, Context: {}", this.operationContextText, t); + } + + // Complete all group sinks so any waiting subscribers can finish + try { + groupSinks.forEach(Sinks.Many::tryEmitComplete); + logger.debug("All group sinks completed, Context: {}", this.operationContextText); + } catch (Throwable t) { + logger.warn("Error completing group sinks, Context: {}", this.operationContextText, t); + } + logger.debug("Shutdown complete, Context: {}", this.operationContextText); } } - public Flux execute() { + public Flux execute() { return this .executeCore() .doFinally((SignalType signal) -> { @@ -187,81 +280,558 @@ public Flux execute() { }); } - private Flux executeCore() { + private Flux executeCore() { + + // For transactional batches, + // resolve partition key range id per transactional batch and group by the partition + // to allow dynamically adjust the concurrency based on the per-partition threshold + Integer nullableMaxConcurrentCosmosPartitions = transactionalBulkExecutionOptionsImpl.getMaxConcurrentCosmosPartitions(); + Mono maxConcurrentCosmosPartitionsMono = nullableMaxConcurrentCosmosPartitions != null ? + Mono.just(Math.max(256, nullableMaxConcurrentCosmosPartitions)) : + ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .getFeedRanges(this.container, false).map(ranges -> Math.max(256, ranges.size() * 2)); + + return + maxConcurrentCosmosPartitionsMono + .subscribeOn(this.executionScheduler) + .flatMapMany(maxConcurrentCosmosPartitions -> { + logDebugOrWarning("TransactionalBulkExecutor.execute with MaxConcurrentPartitions: {}, Context: {}", + maxConcurrentCosmosPartitions, + this.operationContextText); + + return this.inputBatches + .publishOn(this.executionScheduler) + .onErrorMap(throwable -> { + logger.warn("{}: Error observed when processing input batches. Cause: {}, Context: {}", + getThreadInfo(), + throwable.getMessage(), + this.operationContextText, + throwable); + return throwable; + }) + .doOnNext(cosmosBatch -> { + totalCount.incrementAndGet(); + + setRetryPolicyForTransactionalBatch( + docClientWrapper, + this.container, + cosmosBatch, + this.throttlingRetryOptions + ); + + logger.trace( + "SetRetryPolicy for cosmos batch, PkValue: {}, TotalCount: {}, Context: {}, {}", + cosmosBatch.getPartitionKeyValue(), + totalCount.get(), + this.operationContextText, + getThreadInfo() + ); + }) + .doOnComplete(() -> { + mainSourceCompleted.set(true); + + long totalCountSnapshot = totalCount.get(); + logDebugOrWarning("Main source completed - # left items {}, Context: {}", + totalCountSnapshot, + this.operationContextText); + if (totalCountSnapshot == 0) { + // This is needed as there can be case that onComplete was called after last element was processed + // So complete the sink here also if count is 0, if source has completed and count isn't zero, + // then the last element in the doOnNext will close it. Sink doesn't mind in case of a double close. + logInfoOrWarning("Getting complete signal, Total count is 0, close all sinks"); + completeAllSinks(); + } else { + this.cancelFlushTask(true); + this.onFlush(); + + logDebugOrWarning("Scheduled new flush operation {}, Context: {}", getThreadInfo(), this.operationContextText); + } + }) + .mergeWith(mainSink.asFlux()) + .subscribeOn(this.executionScheduler) + .flatMap(cosmosBatch -> { + logger.trace("Before Resolve PkRangeId, PkValue: {}, OpCount: {}, Context: {} {}", + cosmosBatch.getPartitionKeyValue(), + cosmosBatch.getOperations().size(), + this.operationContextText, + getThreadInfo()); + + // resolve partition key range id and attach PartitionScopeThresholds + return resolvePartitionKeyRangeIdForBatch(cosmosBatch) + .map(pkRangeId -> { + PartitionScopeThresholds thresholds = + this.partitionScopeThresholds.computeIfAbsent( + pkRangeId, + newPkRangeId -> new PartitionScopeThresholds(pkRangeId, this.transactionalBulkExecutionOptionsImpl)); + + logTraceOrWarning("Resolved PkRangeId: {}, PkValue: {}, OpCount: {}, Context: {} {}", + pkRangeId, + cosmosBatch.getPartitionKeyValue(), + cosmosBatch.getOperations().size(), + this.operationContextText, + getThreadInfo()); + + return Pair.of(thresholds, cosmosBatch); + }); + }) + .groupBy(Pair::getKey, Pair::getValue) + .flatMap(this::executePartitionedGroupTransactional, maxConcurrentCosmosPartitions) + .subscribeOn(this.executionScheduler) + .doOnNext(response -> doOnResponseOrError()) + .doOnError(throwable -> doOnResponseOrError()) + .doOnComplete(() -> { + int totalCountSnapshot = totalCount.get(); + boolean mainSourceCompletedSnapshot = mainSourceCompleted.get(); + if (totalCountSnapshot == 0 && mainSourceCompletedSnapshot) { + // It is possible that count is zero but there are more elements in the source. + // Count 0 also signifies that there are no pending elements in any sink. + logInfoOrWarning( + "DoOnComplete: All work completed, Context: {} {}", + this.operationContextText, + getThreadInfo()); + completeAllSinks(); + } else { + logDebugOrWarning( + "DoOnComplete: Work left - TotalCount after decrement: {}, main sink completed {}, Context: {} {}", + totalCountSnapshot, + mainSourceCompletedSnapshot, + this.operationContextText, + getThreadInfo()); + } + }); + }); + } + + private void doOnResponseOrError() { + int totalCountAfterDecrement = totalCount.decrementAndGet(); + boolean mainSourceCompletedSnapshot = mainSourceCompleted.get(); + if (totalCountAfterDecrement == 0 && mainSourceCompletedSnapshot) { + // It is possible that count is zero but there are more elements in the source. + // Count 0 also signifies that there are no pending elements in any sink. + logInfoOrWarning("All work completed, TotalCount: {}, Context: {} {}", + totalCountAfterDecrement, + this.operationContextText, + getThreadInfo()); + completeAllSinks(); + } else { + if (totalCountAfterDecrement == 0) { + logDebugOrWarning( + "No Work left - but mainSource not yet completed, Context: {} {}", + this.operationContextText, + getThreadInfo()); + } + logTraceOrWarning( + "Work left - TotalCount after decrement: {}, main sink completed {}, Context: {} {}", + totalCountAfterDecrement, + mainSourceCompletedSnapshot, + this.operationContextText, + getThreadInfo()); + } + } + + private Flux executePartitionedGroupTransactional( + GroupedFlux partitionedGroupFluxOfBatches) { - // For transactional batches, batches are pre-constructed by the writer with proper partition key grouping. - // We just need to execute each pre-built batch. - Integer nullableMaxConcurrentCosmosPartitions = transactionalBulkExecutionOptions.getMaxConcurrentCosmosPartitions(); - int maxConcurrentBatches = nullableMaxConcurrentCosmosPartitions != null ? - Math.max(256, nullableMaxConcurrentCosmosPartitions) : 256; + final PartitionScopeThresholds thresholds = partitionedGroupFluxOfBatches.key(); - logDebugOrWarning("TransactionalBulkExecutor.executeCore with MaxConcurrentBatches: {}, Context: {}", - maxConcurrentBatches, - this.operationContextText); + final Sinks.Many groupSink = Sinks.many().unicast().onBackpressureBuffer(); + final Flux groupFlux = groupSink.asFlux(); + groupSinks.add(groupSink); - return this.inputBatches + Sinks.Many flushSignalGroupSink = Sinks.many().multicast().directBestEffort(); + Flux flushSignalGroupFlux = flushSignalGroupSink.asFlux().share(); + flushSignalGroupSinks.add(flushSignalGroupSink); + + AtomicInteger totalOperationsInFlight = new AtomicInteger(0); + AtomicInteger totalBatchesInFlight = new AtomicInteger(0); + + return partitionedGroupFluxOfBatches + .mergeWith(groupFlux) .publishOn(this.executionScheduler) - .onErrorMap(throwable -> { - logger.warn("{}: Error observed when processing input batches. Cause: {}, Context: {}", - getThreadInfo(), - throwable.getMessage(), - this.operationContextText, - throwable); - return throwable; + .concatMap(cosmosBatch -> { + // using concatMap here for a sequential processing + // this part is to decide whether the cosmos batch can be flushed to downstream for processing + // based on the per-partition threshold and concurrency config + return Mono.defer(() -> { + if (canFlushCosmosBatch( + totalOperationsInFlight, + totalBatchesInFlight, + thresholds, + cosmosBatch)) { + + return Mono.just(cosmosBatch); + } + + // there is no capacity for new cosmos batch to be executed currently + // wait for flush signal + // the flush signal can either come from when a cosmos batch has completed or the background flush task + return flushSignalGroupFlux + .filter((flushSignal) -> + canFlushCosmosBatch( + totalOperationsInFlight, + totalBatchesInFlight, + thresholds, + cosmosBatch)) + .next() + .then(); + }) + .then(Mono.defer(() -> { + totalOperationsInFlight.addAndGet(cosmosBatch.getOperations().size()); + totalBatchesInFlight.incrementAndGet(); + logTraceOrWarning( + "Flush cosmos batch, PKRangeId: {}, PkValue: {}, TotalOpsInFlight: {}, TotalBatchesInFlight: {}, BatchOpCount: {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + totalOperationsInFlight.get(), + totalBatchesInFlight.get(), + cosmosBatch.getOperations().size(), + this.operationContextText, + getThreadInfo()); + + return Mono.just(cosmosBatch); + })); }) - .flatMap( - this::executeTransactionalBatch, - maxConcurrentBatches) - .subscribeOn(this.executionScheduler); + .flatMap(cosmosBatch -> + this.executeTransactionalBatchWithThresholds( + cosmosBatch, + thresholds, + groupSink, + flushSignalGroupSink, + totalBatchesInFlight, + totalOperationsInFlight)); + } + + private boolean canFlushCosmosBatch( + AtomicInteger totalOperationsInFlight, + AtomicInteger totalConcurrentBatchesInFlight, + PartitionScopeThresholds partitionScopeThresholds, + CosmosBatch cosmosBatch) { + + int targetBatchSizeSnapshot = partitionScopeThresholds.getTargetMicroBatchSizeSnapshot(); + int totalOpsInFlightSnapshot = totalOperationsInFlight.get(); + int totalBatchesInFlightSnapshot = totalConcurrentBatchesInFlight.get(); + + boolean canFlush = (cosmosBatch.getOperations().size() + totalOpsInFlightSnapshot <= targetBatchSizeSnapshot) + || (totalBatchesInFlightSnapshot <= 0); + + logTraceOrWarning( + "canFlushCosmosBatch - PkRangeId: {}, PkValue: {}, TargetBatchSize {}, TotalOpsInFlight: {}, TotalBatchesInFlight: {}, BatchOpCount: {}, CanFlush {}, Context: {} {}", + partitionScopeThresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + targetBatchSizeSnapshot, + totalOpsInFlightSnapshot, + totalBatchesInFlightSnapshot, + cosmosBatch.getOperations().size(), + canFlush, + this.operationContextText, + getThreadInfo()); + + return canFlush; } - private Mono executeTransactionalBatch(CosmosBatch cosmosBatch) { - // Extract operations from the pre-built batch - List operations = cosmosBatch.getOperations(); + private void setRetryPolicyForTransactionalBatch( + AsyncDocumentClient docClientWrapper, + CosmosAsyncContainer container, + CosmosBatch cosmosBatch, + ThrottlingRetryOptions throttlingRetryOptions) { + + ResourceThrottleRetryPolicy resourceThrottleRetryPolicy = new ResourceThrottleRetryPolicy( + throttlingRetryOptions.getMaxRetryAttemptsOnThrottledRequests(), + throttlingRetryOptions.getMaxRetryWaitTime(), + false); + + TransactionalBatchRetryPolicy retryPolicy = new TransactionalBatchRetryPolicy( + docClientWrapper.getCollectionCache(), + docClientWrapper.getPartitionKeyRangeCache(), + BridgeInternal.getLink(container), + resourceThrottleRetryPolicy); + + cosmosBatchAccessor.setRetryPolicy(cosmosBatch, retryPolicy); + } + + private Mono enqueueForRetry( + Duration backOffTime, + Sinks.Many groupSink, + CosmosBatch cosmosBatch, + PartitionScopeThresholds thresholds, + String batchTrackingId) { + + // Record an enqueued retry for threshold adjustments + this.recordResponseForRetryInThreshold(cosmosBatch, thresholds); + + if (backOffTime == null || backOffTime.isZero()) { + logDebugOrWarning( + "enqueueForRetry - Retry in group sink for PkRangeId: {}, PkValue: {}, Batch trackingId: {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + groupSink.emitNext(cosmosBatch, serializedEmitFailureHandler); + return Mono.empty(); + } else { + logDebugOrWarning( + "enqueueForRetry - Retry in group sink for PkRangeId: {}, PkValue: {}, BackoffTime: {}, Batch trackingId: {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + backOffTime, + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + return Mono + .delay(backOffTime) + .flatMap((dummy) -> { + groupSink.emitNext(cosmosBatch, serializedCompleteEmitFailureHandler); + return Mono.empty(); + }); + } + } + + private Mono executeTransactionalBatchWithThresholds( + CosmosBatch cosmosBatch, + PartitionScopeThresholds thresholds, + Sinks.Many groupSink, + Sinks.Many flushSignalGroupSink, + AtomicInteger totalBatchesInFlight, + AtomicInteger totalOperationsInFlight) { String batchTrackingId = UUIDs.nonBlockingRandomUUID().toString(); - PartitionKey partitionKey = cosmosBatch.getPartitionKeyValue(); - logDebugOrWarning( - "Executing transactional batch - {} operations, PK: {}, TrackingId: {}, Context: {}", - operations.size(), - partitionKey, + logTraceOrWarning( + "executeTransactionalBatchWithThresholds - PkRangeId: {}, PkValue: {}, BatchOpCount:{}, TrackingId: {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + cosmosBatch.getOperations().size(), batchTrackingId, - this.operationContextText); + this.operationContextText, + getThreadInfo()); - // Set up request options CosmosBatchRequestOptions batchRequestOptions = getBatchRequestOptions(); + return this.container .executeCosmosBatch(cosmosBatch, batchRequestOptions) - .doOnSuccess(response -> { + .publishOn(this.executionScheduler) + .flatMap(response -> { logTraceOrWarning( - "Response for batch of partitionKey %s - status code %s, ActivityId: %s, batch TrackingId %s", + "Response for transactional batch - PkRangeId: {}, PkValue: {}, BatchOpCount: {}, ResponseOpCount: {}, StatusCode: {}, SubStatusCode: {}, ActivityId: {}, Batch trackingId {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), cosmosBatch.getPartitionKeyValue(), + cosmosBatch.getOperations().size(), + response.getResults().size(), response.getStatusCode(), + response.getSubStatusCode(), response.getActivityId(), + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + if (diagnosticsTracker != null && response.getDiagnostics() != null) { + diagnosticsTracker.trackDiagnostics(response.getDiagnostics().getDiagnosticsContext()); + } + + if (response.isSuccessStatusCode()) { + recordSuccessfulResponseInThreshold(cosmosBatch, thresholds); + return Mono.just( + new CosmosBulkTransactionalBatchResponse( + cosmosBatch, + response, + null) + ); + } + + return handleUnsuccessfulResponse(thresholds, batchTrackingId, cosmosBatch, response, groupSink); + }) + .onErrorResume(throwable -> { + if (!(throwable instanceof Exception)) { + return Mono.error(Exceptions.propagate(throwable)); + } + + Exception exception = (Exception) throwable; + return this.handleTransactionalBatchExecutionException( + cosmosBatch, + exception, + groupSink, + thresholds, batchTrackingId); }) - .doOnError(throwable -> { + .doFinally(signalType -> { + int totalOpsInFlightSnapshot = totalOperationsInFlight.addAndGet(-cosmosBatch.getOperations().size()); + int totalBatchesInFlightSnapshot = totalBatchesInFlight.decrementAndGet(); + flushSignalGroupSink.emitNext(1, serializedEmitFailureHandler); logTraceOrWarning( - "Failed to get response for batch of partitionKey %s - batch TrackingId %s", + "CosmosBatch completed, emit flush signal - SignalType: {}, PkRangeId: {}, PkValue: {}, BatchOpCount: {}, TotalOpsInFlight: {}, TotalBatchesInFlight: {}, Context: {} {}", + signalType, + thresholds.getPartitionKeyRangeId(), cosmosBatch.getPartitionKeyValue(), - batchTrackingId, - throwable); + cosmosBatch.getOperations().size(), + totalOpsInFlightSnapshot, + totalBatchesInFlightSnapshot, + this.operationContextText, + getThreadInfo()); }) .subscribeOn(this.executionScheduler); } + private void recordSuccessfulResponseInThreshold(CosmosBatch cosmosBatch, PartitionScopeThresholds thresholds) { + for (int i = 0; i < cosmosBatch.getOperations().size(); i++) { + thresholds.recordSuccessfulOperation(); + } + } + + private void recordResponseForRetryInThreshold(CosmosBatch cosmosBatch, PartitionScopeThresholds thresholds) { + for (int i = 0; i < cosmosBatch.getOperations().size(); i++) { + thresholds.recordEnqueuedRetry(); + } + } + + private Mono handleUnsuccessfulResponse( + PartitionScopeThresholds thresholds, + String batchTrackingId, + CosmosBatch cosmosBatch, + CosmosBatchResponse response, + Sinks.Many groupSink) { + + logDebugOrWarning( + "handleUnsuccessfulResponse - PkRangeId: {}, PkValue: {}, BatchOpCount: {}, StatusCode {}, SubStatusCode {}, Batch trackingId {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + cosmosBatch.getOperations().size(), + response.getStatusCode(), + response.getSubStatusCode(), + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + // Create CosmosException for retry policy to understand: + CosmosException exception = BridgeInternal.createCosmosException( + null, + response.getStatusCode(), + null, + BulkExecutorUtil.getResponseHeadersFromBatchOperationResult(response)); + BridgeInternal.setSubStatusCode(exception, response.getSubStatusCode()); + + return this.handleTransactionalBatchExecutionException(cosmosBatch, exception, groupSink, thresholds, batchTrackingId) + .onErrorResume(throwable -> { + logDebugOrWarning( + "handleUnsuccessfulResponse - Can not be retried. PkRangeId: {}, PkValue: {}, Batch trackingId {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + batchTrackingId, + this.operationContextText, + getThreadInfo(), + throwable); + + return Mono.just( + new CosmosBulkTransactionalBatchResponse( + cosmosBatch, + response, + null + ) + ); // the operation can not be retried, return the original response + }); + } + + private Mono handleTransactionalBatchExecutionException( + CosmosBatch cosmosBatch, + Exception exception, + Sinks.Many groupSink, + PartitionScopeThresholds thresholds, + String batchTrackingId) { + + logDebugOrWarning( + "HandleTransactionalBatchExecutionException - PkRangeId: {}, PkRangeValue: {}, Exception {}, Batch TrackingId {}, Context: {} {}", + cosmosBatch.getPartitionKeyValue(), + thresholds.getPartitionKeyRangeId(), + exception, + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + if (exception instanceof CosmosException) { + CosmosException cosmosException = (CosmosException) exception; + + return cosmosBatchAccessor + .getRetryPolicy(cosmosBatch) + .shouldRetryInMainSink(cosmosException) + .flatMap(shouldRetryInMainSink -> { + if (shouldRetryInMainSink) { + logDebugOrWarning( + "HandleTransactionalBatchExecutionException - Retry in main sink for PkRangeId: {}, PkValue: {}, Error {}, Batch TrackingId {}, Context: {} {}", + thresholds.getPartitionKeyRangeId(), + cosmosBatch.getPartitionKeyValue(), + exception, + batchTrackingId, + this.operationContextText, + getThreadInfo()); + + // retry - but don't mark as enqueued for retry in thresholds + mainSink.emitNext(cosmosBatch, serializedEmitFailureHandler); //TODO: validate booking marking for concurrent ops in flight + return Mono.empty(); + } else { + return retryOtherExceptions( + cosmosBatch, + groupSink, + cosmosBatchAccessor.getRetryPolicy(cosmosBatch), + cosmosException, + thresholds, + batchTrackingId); + } + }); + } + + return Mono.just( + new CosmosBulkTransactionalBatchResponse(cosmosBatch, null, exception) + ); + } + + private Mono retryOtherExceptions( + CosmosBatch cosmosBatch, + Sinks.Many groupSink, + TransactionalBatchRetryPolicy retryPolicy, + CosmosException cosmosException, + PartitionScopeThresholds thresholds, + String batchTrackingId) { + + return retryPolicy.shouldRetry(cosmosException).flatMap(result -> { + if (result.shouldRetry) { + return this.enqueueForRetry(result.backOffTime, groupSink, cosmosBatch, thresholds, batchTrackingId); + } else { + return Mono.just( + new CosmosBulkTransactionalBatchResponse( + cosmosBatch, + null, + cosmosException + ) + ); + } + }); + } + + private Mono resolvePartitionKeyRangeIdForBatch(CosmosBatch batch) { + checkNotNull(batch, "expected non-null batch"); + + return BulkExecutorUtil.resolvePartitionKeyRangeId( + docClientWrapper, + container, + batch.getPartitionKeyValue(), + null); + } + private CosmosBatchRequestOptions getBatchRequestOptions() { CosmosBatchRequestOptions batchRequestOptions = new CosmosBatchRequestOptions(); - batchRequestOptions.setExcludedRegions(transactionalBulkExecutionOptions.getExcludedRegions()); - batchRequestOptions.setKeywordIdentifiers(transactionalBulkExecutionOptions.getKeywordIdentifiers()); + batchRequestOptions.setExcludedRegions(transactionalBulkExecutionOptionsImpl.getExcludedRegions()); + batchRequestOptions.setKeywordIdentifiers(transactionalBulkExecutionOptionsImpl.getKeywordIdentifiers()); cosmosBatchRequestOptionsAccessor .setThroughputControlGroupName( batchRequestOptions, - transactionalBulkExecutionOptions.getThroughputControlGroupName()); + transactionalBulkExecutionOptionsImpl.getThroughputControlGroupName()); CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicySnapshot = - transactionalBulkExecutionOptions.getCosmosEndToEndLatencyPolicyConfig(); + transactionalBulkExecutionOptionsImpl.getCosmosEndToEndLatencyPolicyConfig(); if (e2eLatencyPolicySnapshot != null) { cosmosBatchRequestOptionsAccessor .setEndToEndOperationLatencyPolicyConfig( @@ -269,7 +839,7 @@ private CosmosBatchRequestOptions getBatchRequestOptions() { e2eLatencyPolicySnapshot); } - Map customOptions = transactionalBulkExecutionOptions.getHeaders(); + Map customOptions = transactionalBulkExecutionOptionsImpl.getHeaders(); if (customOptions != null && !customOptions.isEmpty()) { for(Map.Entry entry : customOptions.entrySet()) { cosmosBatchRequestOptionsAccessor @@ -279,15 +849,85 @@ private CosmosBatchRequestOptions getBatchRequestOptions() { cosmosBatchRequestOptionsAccessor .setOperationContextAndListenerTuple(batchRequestOptions, operationListener); + batchRequestOptions.setCustomItemSerializer(this.effectiveItemSerializer); + + cosmosBatchRequestOptionsAccessor.setDisableRetryForThrottledBatchRequest(batchRequestOptions, true); + return batchRequestOptions; } private void completeAllSinks() { logInfoOrWarning("Completing execution, Context: {}", this.operationContextText); - logger.debug("Executor service shut down, Context: {}", this.operationContextText); + + try { + mainSink.emitComplete(serializedCompleteEmitFailureHandler); + } catch (Throwable t) { + logger.warn("Failed to complete main sink, Context: {}", this.operationContextText, t); + } + this.shutdown(); } + private void onFlush() { + try { + logTraceOrWarning("onFlush - emitting flush signal for each group"); + this.flushSignalGroupSinks.forEach(sink -> sink.emitNext(1, serializedEmitFailureHandler)); + } catch(Throwable t) { + logger.error("Callback invocation 'onFlush' failed. Context: {}", this.operationContextText, t); + } + } + + private static class SerializedEmitFailureHandler implements Sinks.EmitFailureHandler { + + @Override + public boolean onEmitFailure(SignalType signalType, Sinks.EmitResult emitResult) { + if (emitResult.equals(Sinks.EmitResult.FAIL_NON_SERIALIZED)) { + logger.debug( + "SerializedEmitFailureHandler.onEmitFailure, emit result {} - Signal:{}, Result: {}", + Sinks.EmitResult.FAIL_NON_SERIALIZED, + signalType, + emitResult); + + return true; + } + + if (emitResult.equals((Sinks.EmitResult.FAIL_ZERO_SUBSCRIBER))) { + // For flushSignalGroupSink which is a Sinks.Many.Multicast, when this happens, it means there is no active subscriber + // this can happen usually at the end of the execution when all the operations have flushed + logger.trace( + "SerializedEmitFailureHandler.onEmitFailure, emit result {} - Signal:{}, Result: {}", + Sinks.EmitResult.FAIL_ZERO_SUBSCRIBER, + signalType, + emitResult); + + return false; + } + + logger.error("SerializedEmitFailureHandler.onEmitFailure - Signal:{}, Result: {}", signalType, emitResult); + return false; + } + } + + private static class SerializedCompleteEmitFailureHandler implements Sinks.EmitFailureHandler { + + @Override + public boolean onEmitFailure(SignalType signalType, Sinks.EmitResult emitResult) { + if (emitResult.equals(Sinks.EmitResult.FAIL_NON_SERIALIZED)) { + logger.debug("SerializedCompleteEmitFailureHandler.onEmitFailure - Signal:{}, Result: {}", signalType, emitResult); + + return true; + } + + if (emitResult == Sinks.EmitResult.FAIL_CANCELLED || emitResult == Sinks.EmitResult.FAIL_TERMINATED) { + logger.debug("SerializedCompleteEmitFailureHandler.onEmitFailure - Main sink already completed, Signal:{}, Result: {}", signalType, emitResult); + return false; + } + + logger.error("SerializedCompleteEmitFailureHandler.onEmitFailure - Signal:{}, Result: {}", signalType, emitResult); + return false; + } + } + private static String getThreadInfo() { StringBuilder sb = new StringBuilder(); Thread t = Thread.currentThread(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatch.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatch.java index fe9b5aab8c3b..02c1a0486106 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatch.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatch.java @@ -6,6 +6,7 @@ import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.batch.ItemBatchOperation; +import com.azure.cosmos.implementation.batch.TransactionalBatchRetryPolicy; import java.util.ArrayList; import java.util.List; @@ -85,6 +86,7 @@ public final class CosmosBatch { private final List> operations; private final PartitionKey partitionKey; + private TransactionalBatchRetryPolicy retryPolicy; CosmosBatch(PartitionKey partitionKey) { checkNotNull(partitionKey, "expected non-null partitionKey"); @@ -391,12 +393,37 @@ List> getOperationsInternal() { return operations; } + CosmosBatch setRetryPolicy(TransactionalBatchRetryPolicy transactionalBatchRetryPolicy) { + this.retryPolicy = transactionalBatchRetryPolicy; + return this; + } + + TransactionalBatchRetryPolicy getRetryPolicy() { + return this.retryPolicy; + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// static void initialize() { ImplementationBridgeHelpers.CosmosBatchHelper.setCosmosBatchAccessor( - cosmosBatch -> cosmosBatch.getOperationsInternal()); + new ImplementationBridgeHelpers.CosmosBatchHelper.CosmosBatchAccessor() { + @Override + public List> getOperationsInternal(CosmosBatch cosmosBatch) { + return cosmosBatch.getOperationsInternal(); + } + + @Override + public CosmosBatch setRetryPolicy(CosmosBatch cosmosBatch, TransactionalBatchRetryPolicy transactionalBatchRetryPolicy) { + return cosmosBatch.setRetryPolicy(transactionalBatchRetryPolicy); + } + + @Override + public TransactionalBatchRetryPolicy getRetryPolicy(CosmosBatch cosmosBatch) { + return cosmosBatch.getRetryPolicy(); + } + } + ); } static { initialize(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatchRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatchRequestOptions.java index f5d21d378bba..7d5a27324f95 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatchRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosBatchRequestOptions.java @@ -37,6 +37,7 @@ public final class CosmosBatchRequestOptions { private String throughputControlGroupName; private CosmosEndToEndOperationLatencyPolicyConfig e2ePolicy; private OperationContextAndListenerTuple operationContextAndListenerTuple; + private boolean disableRetryForThrottledBatchRequest = false; /** * Creates an instance of the CosmosBatchRequestOptions class @@ -56,6 +57,8 @@ public CosmosBatchRequestOptions() { if (toBeCloned.excludeRegions != null) { this.excludeRegions = new ArrayList<>(toBeCloned.excludeRegions); } + + this.disableRetryForThrottledBatchRequest = toBeCloned.disableRetryForThrottledBatchRequest; } /** @@ -120,6 +123,15 @@ public CosmosDiagnosticsThresholds getDiagnosticsThresholds() { return this.thresholds; } + boolean shouldDisableRetryForThrottledBatchRequest() { + return disableRetryForThrottledBatchRequest; + } + + CosmosBatchRequestOptions setDisableRetryForThrottledBatchRequest(boolean disableRetryForThrottledBatchRequest) { + this.disableRetryForThrottledBatchRequest = disableRetryForThrottledBatchRequest; + return this; + } + RequestOptions toRequestOptions() { final RequestOptions requestOptions = new RequestOptions(); requestOptions.setConsistencyLevel(getConsistencyLevel()); @@ -306,6 +318,18 @@ public CosmosBatchRequestOptions setOperationContextAndListenerTuple( return cosmosBatchRequestOptions.setOperationContextAndListenerTuple(operationContextAndListenerTuple); } + @Override + public CosmosBatchRequestOptions setDisableRetryForThrottledBatchRequest( + CosmosBatchRequestOptions cosmosBatchRequestOptions, + boolean disableRetryForThrottledBatchRequest) { + return cosmosBatchRequestOptions.setDisableRetryForThrottledBatchRequest(disableRetryForThrottledBatchRequest); + } + + @Override + public boolean shouldDisableRetryForThrottledBatchRequest(CosmosBatchRequestOptions cosmosBatchRequestOptions) { + return cosmosBatchRequestOptions.shouldDisableRetryForThrottledBatchRequest(); + } + } );