Skip to content

Commit

Permalink
Adding min. retry time per region for 404/1002 SessionTokenRetryPolic…
Browse files Browse the repository at this point in the history
…y when RegionSwitchHint is Remote (#37143)

* Adding min. retry time per region for 404/1002 SessionTokenRetryPolicy when RegionSwitchHint is Remote

* Update SessionRetryOptionsTests.java

* Iterating on session retry changes

* Renaming property

* Update SessionTokenMismatchRetryPolicy.java

* Updating JavaDoc

* Update SessionTokenMismatchRetryPolicy.java

* Update TestSuiteBase.java
  • Loading branch information
FabianMeiswinkel committed Oct 18, 2023
1 parent 03b6915 commit 5bfd1ed
Show file tree
Hide file tree
Showing 11 changed files with 490 additions and 37 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.azure.cosmos.implementation.DatabaseAccountLocation;
import com.azure.cosmos.implementation.GlobalEndpointManager;
import com.azure.cosmos.implementation.HttpConstants;
import com.azure.cosmos.implementation.ImplementationBridgeHelpers;
import com.azure.cosmos.implementation.OperationType;
import com.azure.cosmos.implementation.TestConfigurations;
import com.azure.cosmos.implementation.throughputControl.TestItem;
Expand Down Expand Up @@ -59,6 +60,10 @@
import static org.testng.AssertJUnit.fail;

public class SessionRetryOptionsTests extends TestSuiteBase {
private final static ImplementationBridgeHelpers.CosmosSessionRetryOptionsHelper.CosmosSessionRetryOptionsAccessor
sessionRetryOptionsAccessor = ImplementationBridgeHelpers
.CosmosSessionRetryOptionsHelper
.getCosmosSessionRetryOptionsAccessor();

private CosmosAsyncClient cosmosAsyncClient;
private CosmosAsyncContainer cosmosAsyncContainer;
Expand Down Expand Up @@ -107,6 +112,79 @@ public Object[][] writeOperationContextProvider() {
};
}

@Test(groups = {"unit"}, timeOut = TIMEOUT)
public void SessionRetryOptionsBuilder_defaultValues() {
SessionRetryOptions optionsWithDefaultValues = new SessionRetryOptionsBuilder()
.regionSwitchHint(CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED)
.build();

assertThat(sessionRetryOptionsAccessor.getMaxInRegionRetryCount(optionsWithDefaultValues))
.isEqualTo(Configs.getMaxRetriesInLocalRegionWhenRemoteRegionPreferred());

assertThat(sessionRetryOptionsAccessor.getMinInRegionRetryTime(optionsWithDefaultValues))
.isEqualTo(Configs.getMinRetryTimeInLocalRegionWhenRemoteRegionPreferred());
}

@Test(groups = {"unit"}, timeOut = TIMEOUT)
public void SessionRetryOptionsBuilder_customValues() {
SessionRetryOptions optionsWithDefaultValues = new SessionRetryOptionsBuilder()
.regionSwitchHint(CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED)
.minInRegionRetryTime(Duration.ofSeconds(1))
.maxInRegionRetryCount(3)
.build();

assertThat(sessionRetryOptionsAccessor.getMaxInRegionRetryCount(optionsWithDefaultValues))
.isEqualTo(3);

assertThat(sessionRetryOptionsAccessor.getMinInRegionRetryTime(optionsWithDefaultValues))
.isEqualTo(Duration.ofSeconds(1));
}

@Test(groups = {"unit"}, timeOut = TIMEOUT)
public void SessionRetryOptionsBuilder_minimum_maxRetryCountEnforced() {
SessionRetryOptionsBuilder builder = new SessionRetryOptionsBuilder()
.regionSwitchHint(CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED)
.maxInRegionRetryCount(0);

try {
builder.build();

fail("Building the session retry options should have failed");
} catch (IllegalArgumentException illegalArgumentException) {
logger.info("Expected IllegalArgumentException", illegalArgumentException);
}
}

@Test(groups = {"unit"}, timeOut = TIMEOUT)
public void SessionRetryOptionsBuilder_minimum_minRetryTimeEnforced() {
SessionRetryOptionsBuilder builder = new SessionRetryOptionsBuilder()
.regionSwitchHint(CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED)
.minInRegionRetryTime(Duration.ofMillis(99));

try {
builder.build();

fail("Building the session retry options should have failed");
} catch (IllegalArgumentException illegalArgumentException) {
logger.info("Expected IllegalArgumentException", illegalArgumentException);
}
}

@Test(groups = {"unit"}, timeOut = TIMEOUT)
public void SessionRetryOptionsBuilder_minRetryTimeRequired() {
SessionRetryOptionsBuilder builder = new SessionRetryOptionsBuilder()
.regionSwitchHint(CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED)
.minInRegionRetryTime(null);

try {
builder.build();

fail("Building the session retry options should have failed");
} catch (IllegalArgumentException illegalArgumentException) {
logger.info("Expected IllegalArgumentException", illegalArgumentException);
}
}

@Test(groups = {"multi-master"}, dataProvider = "nonWriteOperationContextProvider", timeOut = TIMEOUT)
public void nonWriteOperation_WithReadSessionUnavailable_test(
OperationType operationType,
Expand Down Expand Up @@ -176,7 +254,7 @@ public void nonWriteOperation_WithReadSessionUnavailable_test(
// Check if the SessionTokenMismatchRetryPolicy retries on the bad / lagging region
// for sessionTokenMismatchRetryAttempts by tracking the badSessionTokenRule hit count
if (regionSwitchHint == CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED) {
assertThat(badSessionTokenRule.getHitCount()).isBetween((long) sessionTokenMismatchRetryAttempts, sessionTokenMismatchRetryAttempts * 4L);
assertThat(badSessionTokenRule.getHitCount()).isBetween((long) sessionTokenMismatchRetryAttempts, (1 + sessionTokenMismatchRetryAttempts) * 4L);
}
} finally {
System.clearProperty("COSMOS.MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED");
Expand Down Expand Up @@ -250,7 +328,9 @@ public void writeOperation_withReadSessionUnavailable_test(
// Check if the SessionTokenMismatchRetryPolicy retries on the bad / lagging region
// for sessionTokenMismatchRetryAttempts by tracking the badSessionTokenRule hit count
if (regionSwitchHint == CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED) {
assertThat(badSessionTokenRule.getHitCount()).isEqualTo(sessionTokenMismatchRetryAttempts);
// higher hit count is possible while in MinRetryWaitTimeWithinRegion
assertThat(badSessionTokenRule.getHitCount()).isGreaterThanOrEqualTo(
sessionTokenMismatchRetryAttempts);
}
} finally {
System.clearProperty("COSMOS.MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,24 @@ protected static void waitIfNeededForReplicasToCatchUp(CosmosClientBuilder clien
public static CosmosAsyncContainer createCollection(CosmosAsyncDatabase database, CosmosContainerProperties cosmosContainerProperties,
CosmosContainerRequestOptions options, int throughput) {
database.createContainer(cosmosContainerProperties, ThroughputProperties.createManualThroughput(throughput), options).block();

// Creating a container is async - especially on multi-partition or multi-region accounts
CosmosAsyncClient client = ImplementationBridgeHelpers
.CosmosAsyncDatabaseHelper
.getCosmosAsyncDatabaseAccessor()
.getCosmosAsyncClient(database);
boolean isMultiRegional = ImplementationBridgeHelpers
.CosmosAsyncClientHelper
.getCosmosAsyncClientAccessor()
.getPreferredRegions(client).size() > 1;
if (throughput > 6000 || isMultiRegional) {
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}

return database.getContainer(cosmosContainerProperties.getId());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

package com.azure.cosmos;

import com.azure.cosmos.implementation.Configs;
import com.azure.cosmos.implementation.ImplementationBridgeHelpers;

import java.time.Duration;

/**
* {@link SessionRetryOptions} encapsulates hints which influence
* internal retry policies which are applied when the effective consistency
Expand All @@ -13,12 +16,19 @@
public final class SessionRetryOptions {

private final CosmosRegionSwitchHint regionSwitchHint;
private final Duration minInRegionRetryTime;

private final int maxInRegionRetryCount;

/**
* Instantiates {@link SessionRetryOptions}
* */
SessionRetryOptions(CosmosRegionSwitchHint regionSwitchHint) {
SessionRetryOptions(CosmosRegionSwitchHint regionSwitchHint,
Duration minInRegionRetryTime,
int maxInRegionRetryCount) {
this.regionSwitchHint = regionSwitchHint;
this.minInRegionRetryTime = minInRegionRetryTime ;
this.maxInRegionRetryCount = maxInRegionRetryCount;
}

static void initialize() {
Expand All @@ -29,6 +39,16 @@ static void initialize() {
public CosmosRegionSwitchHint getRegionSwitchHint(SessionRetryOptions sessionRetryOptions) {
return sessionRetryOptions.regionSwitchHint;
}

@Override
public Duration getMinInRegionRetryTime(SessionRetryOptions sessionRetryOptions) {
return sessionRetryOptions.minInRegionRetryTime;
}

@Override
public int getMaxInRegionRetryCount(SessionRetryOptions sessionRetryOptions) {
return sessionRetryOptions.maxInRegionRetryCount;
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

package com.azure.cosmos;

import com.azure.cosmos.implementation.Configs;

import java.time.Duration;

import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument;
import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull;

/**
Expand All @@ -12,6 +17,9 @@
public final class SessionRetryOptionsBuilder {

private CosmosRegionSwitchHint regionSwitchHint;
private Duration minInRegionRetryTime = Configs.getMinRetryTimeInLocalRegionWhenRemoteRegionPreferred();

private int maxInRegionRetryCount = Configs.getMaxRetriesInLocalRegionWhenRemoteRegionPreferred();

/**
* Sets the {@link CosmosRegionSwitchHint} which specifies for
Expand All @@ -31,13 +39,59 @@ public SessionRetryOptionsBuilder regionSwitchHint(CosmosRegionSwitchHint region
return this;
}

/**
* Sets the minimum retry time for 404/1002 retries within each region for read and write operations. The minimum
* value is 100ms - this minimum is enforced to provide a way for the local region to catch-up on replication lag.
* The default value is 500ms - as a recommendation ensure that this value is higher than the steady-state
* replication latency between the regions you chose.
* @param minRetryTime the min retry time to be used with-in each region
* @return This instance of {@link SessionRetryOptionsBuilder}
*/
public SessionRetryOptionsBuilder minInRegionRetryTime(Duration minRetryTime) {
this.minInRegionRetryTime = minRetryTime;
return this;
}

/**
* Sets the maximum number of retries within each region for read and write operations. The minimum
* value is 1 - the backoff time for the last in-region retry will ensure that the total retry time within the
* region is at least the min. in-region retry time.
* @param maxInRegionRetryCount the max. number of retries with-in each region
* @return This instance of {@link SessionRetryOptionsBuilder}
*/
public SessionRetryOptionsBuilder maxInRegionRetryCount(int maxInRegionRetryCount) {
this.maxInRegionRetryCount = maxInRegionRetryCount;
return this;
}

/**
* Builds an instance of {@link SessionRetryOptions}
*
* @return An instance of {@link SessionRetryOptions}
* */
public SessionRetryOptions build() {
checkNotNull(regionSwitchHint, "regionSwitch hint cannot be null");
return new SessionRetryOptions(regionSwitchHint);

if (regionSwitchHint == CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED) {
checkArgument(
minInRegionRetryTime != null,
"Argument 'minInRegionRetryTimeForWriteOperations' must not be null when 'regionSwitchHint' "
+ "is 'REMOTE_REGION_PREFERRED'.");

checkArgument(
minInRegionRetryTime
.compareTo(Duration.ofMillis(Configs.MIN_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS)) >= 0,
"Argument 'minInRegionRetryTime' must have at least a value of '"
+ Duration.ofMillis(Configs.MIN_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS)
+ "' when 'regionSwitchHint' is 'REMOTE_REGION_PREFERRED'.");

checkArgument(
maxInRegionRetryCount >= Configs.MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED,
"Argument 'maxInRegionRetryCount' must have at least a value of '"
+ Configs.MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED
+ "' when 'regionSwitchHint' is 'REMOTE_REGION_PREFERRED'.");
}

return new SessionRetryOptions(regionSwitchHint, minInRegionRetryTime, maxInRegionRetryCount);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ public class Configs {
"COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_MAXIMUM_BACKOFF_TIME_IN_MILLISECONDS";
private static final int DEFAULT_SESSION_TOKEN_MISMATCH_MAXIMUM_BACKOFF_TIME_IN_MILLISECONDS = 500;

public static final int MIN_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS = 100;

private static final String DEFAULT_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS_NAME =
"COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_IN_REGION-RETRY_TIME_IN_MILLISECONDS";
private static final int DEFAULT_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS = 500;

// Whether to process the response on a different thread
private static final String SWITCH_OFF_IO_THREAD_FOR_RESPONSE_NAME = "COSMOS.SWITCH_OFF_IO_THREAD_FOR_RESPONSE";
private static final boolean DEFAULT_SWITCH_OFF_IO_THREAD_FOR_RESPONSE = false;
Expand Down Expand Up @@ -144,6 +150,8 @@ public class Configs {
private static final String MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED = "COSMOS.MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED";
private static final int DEFAULT_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED = 1;

public static final int MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED = 1;

public Configs() {
this.sslContext = sslContextInit();
}
Expand Down Expand Up @@ -395,7 +403,20 @@ public static int getAggressiveWarmupConcurrency() {
}

public static int getMaxRetriesInLocalRegionWhenRemoteRegionPreferred() {
return getIntValue(System.getProperty(MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED),
DEFAULT_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED);
return
Math.max(
getIntValue(
System.getProperty(MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED),
DEFAULT_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED),
MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED);
}

public static Duration getMinRetryTimeInLocalRegionWhenRemoteRegionPreferred() {
return
Duration.ofMillis(Math.max(
getIntValue(
System.getProperty(DEFAULT_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS_NAME),
DEFAULT_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS),
MIN_MIN_IN_REGION_RETRY_TIME_FOR_WRITES_MS));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,9 @@ public static void setCosmosSessionRetryOptionsAccessor(final CosmosSessionRetry

public interface CosmosSessionRetryOptionsAccessor {
CosmosRegionSwitchHint getRegionSwitchHint(SessionRetryOptions sessionRetryOptions);
Duration getMinInRegionRetryTime(SessionRetryOptions sessionRetryOptions);

int getMaxInRegionRetryCount(SessionRetryOptions sessionRetryOptions);
}
}
}
Loading

0 comments on commit 5bfd1ed

Please sign in to comment.