diff --git a/amazon-kinesis-client-multilang/pom.xml b/amazon-kinesis-client-multilang/pom.xml index c972b61c2..6fed93f75 100644 --- a/amazon-kinesis-client-multilang/pom.xml +++ b/amazon-kinesis-client-multilang/pom.xml @@ -21,16 +21,12 @@ amazon-kinesis-client-pom software.amazon.kinesis - 2.6.1-SNAPSHOT + 3.0.0 4.0.0 amazon-kinesis-client-multilang - - 1.12.668 - - software.amazon.kinesis @@ -43,36 +39,10 @@ ${awssdk.version} - - com.amazonaws - aws-java-sdk-core - ${aws-java-sdk.version} - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.dataformat - jackson-dataformat-cbor - - - org.apache.httpcomponents - httpclient - - - - - - com.amazonaws - aws-java-sdk-sts - ${aws-java-sdk.version} - - org.projectlombok lombok - 1.18.24 + 1.18.28 provided @@ -104,6 +74,12 @@ + + org.junit.jupiter + junit-jupiter-api + 5.11.3 + test + junit junit @@ -122,6 +98,13 @@ 1.3 test + + + org.mockito + mockito-junit-jupiter + 3.12.4 + test + diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java index 4588b2465..feb35ad49 100644 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/MultiLangDaemon.java @@ -61,10 +61,10 @@ * applicationName = PythonKCLSample * * # Users can change the credentials provider the KCL will use to retrieve credentials. - * # The DefaultAWSCredentialsProviderChain checks several other providers, which is + * # The DefaultCredentialsProvider checks several other providers, which is * # described here: - * # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html - * AWSCredentialsProvider = DefaultAWSCredentialsProviderChain + * # https://sdk.amazonaws.com/java/api/2.0.0-preview-11/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html + * AwsCredentialsProvider = DefaultCredentialsProvider * */ @Slf4j @@ -141,7 +141,7 @@ void configureLogging( } } - String propertiesFile(final MultiLangDaemonArguments arguments) { + String validateAndGetPropertiesFileName(final MultiLangDaemonArguments arguments) { String propertiesFile = ""; if (CollectionUtils.isNotEmpty(arguments.parameters)) { @@ -216,9 +216,9 @@ public static void main(final String[] args) { MultiLangDaemonArguments arguments = new MultiLangDaemonArguments(); JCommander jCommander = daemon.buildJCommanderAndParseArgs(arguments, args); try { - String propertiesFile = daemon.propertiesFile(arguments); + String propertiesFileName = daemon.validateAndGetPropertiesFileName(arguments); daemon.configureLogging(arguments.logConfiguration); - MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFile); + MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFileName); Scheduler scheduler = daemon.buildScheduler(config); MultiLangRunner runner = new MultiLangRunner(scheduler); diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java index 192118220..13acfeb15 100644 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java @@ -15,13 +15,14 @@ package software.amazon.kinesis.multilang; import java.util.HashMap; +import java.util.List; import java.util.Map; -import com.amazonaws.regions.Regions; import com.google.common.base.CaseFormat; import lombok.AccessLevel; import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.regions.Region; /** * Key-Value pairs which may be nested in, and extracted from, a property value @@ -73,8 +74,13 @@ void visit(final NestedPropertyProcessor processor, final String endpoint) { * @see Available Regions */ ENDPOINT_REGION { - void visit(final NestedPropertyProcessor processor, final String region) { - processor.acceptEndpointRegion(Regions.fromName(region)); + void visit(final NestedPropertyProcessor processor, final String regionName) { + List validRegions = Region.regions(); + Region region = Region.of(regionName); + if (!validRegions.contains(region)) { + throw new IllegalArgumentException("Invalid region name: " + regionName); + } + processor.acceptEndpointRegion(region); } }, diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java index f7587297f..a0b9c3b40 100644 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java @@ -14,7 +14,7 @@ */ package software.amazon.kinesis.multilang; -import com.amazonaws.regions.Regions; +import software.amazon.awssdk.regions.Region; /** * Defines methods to process {@link NestedPropertyKey}s. @@ -28,7 +28,7 @@ public interface NestedPropertyProcessor { * (e.g., https://sns.us-west-1.amazonaws.com, sns.us-west-1.amazonaws.com) * @param signingRegion the region to use for SigV4 signing of requests (e.g. us-west-1) * - * @see #acceptEndpointRegion(Regions) + * @see #acceptEndpointRegion(Region) * @see * AwsClientBuilder.EndpointConfiguration */ @@ -42,7 +42,7 @@ public interface NestedPropertyProcessor { * * @see #acceptEndpoint(String, String) */ - void acceptEndpointRegion(Regions region); + void acceptEndpointRegion(Region region); /** * Set the external id, an optional field to designate who can assume an IAM role. diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java deleted file mode 100644 index b5b9f924c..000000000 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2023 Amazon.com, Inc. or its affiliates. - * Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package software.amazon.kinesis.multilang.auth; - -import java.util.Arrays; - -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.AWSSessionCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.Builder; -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient; -import software.amazon.kinesis.multilang.NestedPropertyKey; -import software.amazon.kinesis.multilang.NestedPropertyProcessor; - -/** - * An {@link AWSSessionCredentialsProvider} that is backed by STSAssumeRole. - */ -public class KclSTSAssumeRoleSessionCredentialsProvider - implements AWSSessionCredentialsProvider, NestedPropertyProcessor { - - private final Builder builder; - - private final STSAssumeRoleSessionCredentialsProvider provider; - - /** - * - * @param params vararg parameters which must include roleArn at index=0, - * and roleSessionName at index=1 - */ - public KclSTSAssumeRoleSessionCredentialsProvider(final String[] params) { - this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length)); - } - - public KclSTSAssumeRoleSessionCredentialsProvider( - final String roleArn, final String roleSessionName, final String... params) { - builder = new Builder(roleArn, roleSessionName); - NestedPropertyKey.parse(this, params); - provider = builder.build(); - } - - @Override - public AWSSessionCredentials getCredentials() { - return provider.getCredentials(); - } - - @Override - public void refresh() { - // do nothing - } - - @Override - public void acceptEndpoint(final String serviceEndpoint, final String signingRegion) { - final EndpointConfiguration endpoint = new EndpointConfiguration(serviceEndpoint, signingRegion); - final AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClient.builder() - .withEndpointConfiguration(endpoint) - .build(); - builder.withStsClient(stsClient); - } - - @Override - public void acceptEndpointRegion(final Regions region) { - final AWSSecurityTokenService stsClient = - AWSSecurityTokenServiceClient.builder().withRegion(region).build(); - builder.withStsClient(stsClient); - } - - @Override - public void acceptExternalId(final String externalId) { - builder.withExternalId(externalId); - } -} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java new file mode 100644 index 000000000..1856fa58f --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java @@ -0,0 +1,61 @@ +package software.amazon.kinesis.multilang.auth; + +import java.net.URI; +import java.util.Arrays; + +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest.Builder; +import software.amazon.kinesis.multilang.NestedPropertyKey; +import software.amazon.kinesis.multilang.NestedPropertyProcessor; + +public class KclStsAssumeRoleCredentialsProvider implements AwsCredentialsProvider, NestedPropertyProcessor { + private final Builder assumeRoleRequestBuilder; + private final StsClientBuilder stsClientBuilder; + private final StsAssumeRoleCredentialsProvider stsAssumeRoleCredentialsProvider; + + public KclStsAssumeRoleCredentialsProvider(String[] params) { + this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length)); + } + + public KclStsAssumeRoleCredentialsProvider(String roleArn, String roleSessionName, String... params) { + this.assumeRoleRequestBuilder = + AssumeRoleRequest.builder().roleArn(roleArn).roleSessionName(roleSessionName); + this.stsClientBuilder = StsClient.builder(); + NestedPropertyKey.parse(this, params); + this.stsAssumeRoleCredentialsProvider = StsAssumeRoleCredentialsProvider.builder() + .refreshRequest(assumeRoleRequestBuilder.build()) + .asyncCredentialUpdateEnabled(true) + .stsClient(stsClientBuilder.build()) + .build(); + } + + @Override + public AwsCredentials resolveCredentials() { + return stsAssumeRoleCredentialsProvider.resolveCredentials(); + } + + @Override + public void acceptEndpoint(String serviceEndpoint, String signingRegion) { + if (!serviceEndpoint.startsWith("http://") && !serviceEndpoint.startsWith("https://")) { + serviceEndpoint = "https://" + serviceEndpoint; + } + stsClientBuilder.endpointOverride(URI.create(serviceEndpoint)); + stsClientBuilder.region(Region.of(signingRegion)); + } + + @Override + public void acceptEndpointRegion(Region region) { + stsClientBuilder.region(region); + } + + @Override + public void acceptExternalId(String externalId) { + assumeRoleRequestBuilder.externalId(externalId); + } +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java deleted file mode 100644 index 8110d4f77..000000000 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoder.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright 2019 Amazon.com, Inc. or its affiliates. - * Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package software.amazon.kinesis.multilang.config; - -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSCredentialsProviderChain; -import lombok.extern.slf4j.Slf4j; - -/** - * Get AWSCredentialsProvider property. - */ -@Slf4j -class AWSCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder { - private static final String LIST_DELIMITER = ","; - private static final String ARG_DELIMITER = "|"; - - /** - * Constructor. - */ - AWSCredentialsProviderPropertyValueDecoder() {} - - /** - * Get AWSCredentialsProvider property. - * - * @param value - * property value as String - * @return corresponding variable in correct type - */ - @Override - public AWSCredentialsProvider decodeValue(String value) { - if (value != null) { - List providerNames = getProviderNames(value); - List providers = getValidCredentialsProviders(providerNames); - AWSCredentialsProvider[] ps = new AWSCredentialsProvider[providers.size()]; - providers.toArray(ps); - return new AWSCredentialsProviderChain(providers); - } else { - throw new IllegalArgumentException("Property AWSCredentialsProvider is missing."); - } - } - - /** - * @return list of supported types - */ - @Override - public List> getSupportedTypes() { - return Collections.singletonList(AWSCredentialsProvider.class); - } - - /** - * Convert string list to a list of valid credentials providers. - */ - private static List getValidCredentialsProviders(List providerNames) { - List credentialsProviders = new ArrayList<>(); - - for (String providerName : providerNames) { - final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER); - final Class clazz; - try { - final Class c = Class.forName(nameAndArgs[0]); - if (!AWSCredentialsProvider.class.isAssignableFrom(c)) { - continue; - } - clazz = (Class) c; - } catch (ClassNotFoundException cnfe) { - // Providers are a product of prefixed Strings to cover multiple - // namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }). - // It's expected that many class names will not resolve. - continue; - } - log.info("Attempting to construct {}", clazz); - - AWSCredentialsProvider provider = null; - if (nameAndArgs.length > 1) { - final String[] varargs = Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length); - - // attempt to invoke an explicit N-arg constructor of FooClass(String, String, ...) - provider = constructProvider(providerName, () -> { - Class[] argTypes = new Class[nameAndArgs.length - 1]; - Arrays.fill(argTypes, String.class); - return clazz.getConstructor(argTypes).newInstance(varargs); - }); - - if (provider == null) { - // attempt to invoke a public varargs/array constructor of FooClass(String[]) - provider = constructProvider(providerName, () -> clazz.getConstructor(String[].class) - .newInstance((Object) varargs)); - } - } - - if (provider == null) { - // regardless of parameters, fallback to invoke a public no-arg constructor - provider = constructProvider(providerName, clazz::newInstance); - } - - if (provider != null) { - credentialsProviders.add(provider); - } - } - return credentialsProviders; - } - - private static List getProviderNames(String property) { - // assume list delimiter is "," - String[] elements = property.split(LIST_DELIMITER); - List result = new ArrayList<>(); - for (int i = 0; i < elements.length; i++) { - String string = elements[i].trim(); - if (!string.isEmpty()) { - // find all possible names and add them to name list - result.addAll(getPossibleFullClassNames(string)); - } - } - return result; - } - - private static List getPossibleFullClassNames(final String provider) { - return Stream.of( - // Customer provides a short name of common providers in com.amazonaws.auth package - // (e.g., any classes implementing the AWSCredentialsProvider interface) - // @see - // http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/AWSCredentialsProvider.html - "com.amazonaws.auth.", - - // Customer provides a short name of a provider offered by this multi-lang package - "software.amazon.kinesis.multilang.auth.", - - // Customer provides a fully-qualified provider name, or a custom credentials provider - // (e.g., com.amazonaws.auth.ClasspathFileCredentialsProvider, org.mycompany.FooProvider) - "") - .map(prefix -> prefix + provider) - .collect(Collectors.toList()); - } - - @FunctionalInterface - private interface CredentialsProviderConstructor { - T construct() - throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException; - } - - /** - * Attempts to construct an {@link AWSCredentialsProvider}. - * - * @param providerName Raw, unmodified provider name. Should there be an - * Exeception during construction, this parameter will be logged. - * @param constructor supplier-like function that will perform the construction - * @return the constructed provider, if successful; otherwise, null - * - * @param type of the CredentialsProvider to construct - */ - private static T constructProvider( - final String providerName, final CredentialsProviderConstructor constructor) { - try { - return constructor.construct(); - } catch (NoSuchMethodException ignored) { - // ignore - } catch (IllegalAccessException | InstantiationException | InvocationTargetException | RuntimeException e) { - log.warn("Failed to construct {}", providerName, e); - } - return null; - } -} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java new file mode 100644 index 000000000..fd29a3dba --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoder.java @@ -0,0 +1,261 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.multilang.config; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; +import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider; + +/** + * Get AwsCredentialsProvider property. + */ +@Slf4j +class AwsCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder { + private static final String LIST_DELIMITER = ","; + private static final String ARG_DELIMITER = "|"; + + /** + * Constructor. + */ + AwsCredentialsProviderPropertyValueDecoder() {} + + /** + * Get AwsCredentialsProvider property. + * + * @param value + * property value as String + * @return corresponding variable in correct type + */ + @Override + public AwsCredentialsProvider decodeValue(String value) { + if (value != null) { + List providerNames = getProviderNames(value); + List providers = getValidCredentialsProviders(providerNames); + AwsCredentialsProvider[] ps = new AwsCredentialsProvider[providers.size()]; + providers.toArray(ps); + if (providers.isEmpty()) { + log.warn("Unable to construct any provider with name {}", value); + log.warn("Please verify that all AwsCredentialsProvider properties are passed correctly"); + } + return AwsCredentialsProviderChain.builder() + .credentialsProviders(providers) + .build(); + } else { + throw new IllegalArgumentException("Property AwsCredentialsProvider is missing."); + } + } + + /** + * @return list of supported types + */ + @Override + public List> getSupportedTypes() { + return Collections.singletonList(AwsCredentialsProvider.class); + } + + /** + * Convert string list to a list of valid credentials providers. + */ + private static List getValidCredentialsProviders(List providerNames) { + List credentialsProviders = new ArrayList<>(); + + for (String providerName : providerNames) { + final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER); + final Class clazz = getClass(nameAndArgs[0]); + if (clazz == null) { + continue; + } + log.info("Attempting to construct {}", clazz); + final String[] varargs = + nameAndArgs.length > 1 ? Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length) : new String[0]; + AwsCredentialsProvider provider = tryConstructor(providerName, clazz, varargs); + if (provider == null) { + provider = tryCreate(providerName, clazz, varargs); + } + if (provider != null) { + log.info("Provider constructed successfully: {}", provider); + credentialsProviders.add(provider); + } + } + return credentialsProviders; + } + + private static AwsCredentialsProvider tryConstructor( + String providerName, Class clazz, String[] varargs) { + AwsCredentialsProvider provider = + constructProvider(providerName, () -> getConstructorWithVarArgs(clazz, varargs)); + if (provider == null) { + provider = constructProvider(providerName, () -> getConstructorWithArgs(clazz, varargs)); + } + if (provider == null) { + provider = constructProvider(providerName, clazz::newInstance); + } + return provider; + } + + private static AwsCredentialsProvider tryCreate( + String providerName, Class clazz, String[] varargs) { + AwsCredentialsProvider provider = + constructProvider(providerName, () -> getCreateMethod(clazz, (Object) varargs)); + if (provider == null) { + provider = constructProvider(providerName, () -> getCreateMethod(clazz, varargs)); + } + if (provider == null) { + provider = constructProvider(providerName, () -> getCreateMethod(clazz)); + } + return provider; + } + + private static AwsCredentialsProvider getConstructorWithVarArgs( + Class clazz, String[] varargs) { + try { + return clazz.getConstructor(String[].class).newInstance((Object) varargs); + } catch (Exception e) { + return null; + } + } + + private static AwsCredentialsProvider getConstructorWithArgs( + Class clazz, String[] varargs) { + try { + Class[] argTypes = new Class[varargs.length]; + Arrays.fill(argTypes, String.class); + return clazz.getConstructor(argTypes).newInstance((Object[]) varargs); + } catch (Exception e) { + return null; + } + } + + private static AwsCredentialsProvider getCreateMethod( + Class clazz, Object... args) { + try { + Class[] argTypes = new Class[args.length]; + for (int i = 0; i < args.length; i++) { + argTypes[i] = args[i].getClass(); + } + Method createMethod = clazz.getDeclaredMethod("create", argTypes); + if (Modifier.isStatic(createMethod.getModifiers())) { + return clazz.cast(createMethod.invoke(null, args)); + } else { + log.warn("Found non-static create() method in {}", clazz.getName()); + } + } catch (NoSuchMethodException e) { + // No matching create method found for class + } catch (Exception e) { + log.warn("Failed to invoke create() method in {}", clazz.getName(), e); + } + return null; + } + + /** + * Resolves the class for the given provider name. + * + * @param providerName A string containing the provider name. + * + * @return The Class object representing the resolved AwsCredentialsProvider implementation, + * or null if the class cannot be resolved or does not extend AwsCredentialsProvider. + */ + private static Class getClass(String providerName) { + // Convert any form of StsAssumeRoleCredentialsProvider string to KclStsAssumeRoleCredentialsProvider + if (providerName.equals(StsAssumeRoleCredentialsProvider.class.getSimpleName()) + || providerName.equals(StsAssumeRoleCredentialsProvider.class.getName())) { + providerName = KclStsAssumeRoleCredentialsProvider.class.getName(); + } + try { + final Class c = Class.forName(providerName); + if (!AwsCredentialsProvider.class.isAssignableFrom(c)) { + return null; + } + return (Class) c; + } catch (ClassNotFoundException cnfe) { + // Providers are a product of prefixed Strings to cover multiple + // namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }). + // It's expected that many class names will not resolve. + return null; + } + } + + private static List getProviderNames(String property) { + // assume list delimiter is "," + String[] elements = property.split(LIST_DELIMITER); + List result = new ArrayList<>(); + for (int i = 0; i < elements.length; i++) { + String string = elements[i].trim(); + if (!string.isEmpty()) { + // find all possible names and add them to name list + result.addAll(getPossibleFullClassNames(string)); + } + } + return result; + } + + private static List getPossibleFullClassNames(final String provider) { + return Stream.of( + // Customer provides a short name of a provider offered by this multi-lang package + "software.amazon.kinesis.multilang.auth.", + // Customer provides a short name of common providers in software.amazon.awssdk.auth.credentials + // package (e.g., any classes implementing the AwsCredentialsProvider interface) + // @see + // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.html + "software.amazon.awssdk.auth.credentials.", + // Customer provides a fully-qualified provider name, or a custom credentials provider + // (e.g., org.mycompany.FooProvider) + "") + .map(prefix -> prefix + provider) + .collect(Collectors.toList()); + } + + @FunctionalInterface + private interface CredentialsProviderConstructor { + T construct() + throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException; + } + + /** + * Attempts to construct an {@link AwsCredentialsProvider}. + * + * @param providerName Raw, unmodified provider name. Should there be an + * Exception during construction, this parameter will be logged. + * @param constructor supplier-like function that will perform the construction + * @return the constructed provider, if successful; otherwise, null + * + * @param type of the CredentialsProvider to construct + */ + private static T constructProvider( + final String providerName, final CredentialsProviderConstructor constructor) { + try { + return constructor.construct(); + } catch (NoSuchMethodException + | IllegalAccessException + | InstantiationException + | InvocationTargetException + | RuntimeException ignored) { + // ignore + } + return null; + } +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java new file mode 100644 index 000000000..bb315706f --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/CoordinatorStateTableConfigBean.java @@ -0,0 +1,56 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package software.amazon.kinesis.multilang.config; + +import lombok.Getter; +import lombok.Setter; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig; + +@Getter +@Setter +public class CoordinatorStateTableConfigBean { + + interface CoordinatorStateConfigBeanDelegate { + String getCoordinatorStateTableName(); + + void setCoordinatorStateTableName(String value); + + BillingMode getCoordinatorStateBillingMode(); + + void setCoordinatorStateBillingMode(BillingMode value); + + long getCoordinatorStateReadCapacity(); + + void setCoordinatorStateReadCapacity(long value); + + long getCoordinatorStateWriteCapacity(); + + void setCoordinatorStateWriteCapacity(long value); + } + + @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "tableName") + private String coordinatorStateTableName; + + @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "billingMode") + private BillingMode coordinatorStateBillingMode; + + @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "readCapacity") + private long coordinatorStateReadCapacity; + + @ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "writeCapacity") + private long coordinatorStateWriteCapacity; +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java new file mode 100644 index 000000000..973279624 --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/GracefulLeaseHandoffConfigBean.java @@ -0,0 +1,41 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package software.amazon.kinesis.multilang.config; + +import lombok.Getter; +import lombok.Setter; +import software.amazon.kinesis.leases.LeaseManagementConfig; + +@Getter +@Setter +public class GracefulLeaseHandoffConfigBean { + + interface GracefulLeaseHandoffConfigBeanDelegate { + Long getGracefulLeaseHandoffTimeoutMillis(); + + void setGracefulLeaseHandoffTimeoutMillis(Long value); + + Boolean getIsGracefulLeaseHandoffEnabled(); + + void setIsGracefulLeaseHandoffEnabled(Boolean value); + } + + @ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class) + private Long gracefulLeaseHandoffTimeoutMillis; + + @ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class) + private Boolean isGracefulLeaseHandoffEnabled; +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java index 42b617a03..0d897efa3 100644 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfigurator.java @@ -28,7 +28,7 @@ /** * KinesisClientLibConfigurator constructs a KinesisClientLibConfiguration from java properties file. The following - * three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AWSCredentialsProvider" + * three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AwsCredentialsProvider" * KinesisClientLibConfigurator will help to automatically assign the value of "workerId" if this property is not * provided. In the specified properties file, any properties, which matches the variable name in * KinesisClientLibConfiguration and has a corresponding "with{variableName}" setter method, will be read in, and its @@ -62,7 +62,7 @@ public MultiLangDaemonConfiguration getConfiguration(Properties properties) { properties.entrySet().forEach(e -> { try { log.info("Processing (key={}, value={})", e.getKey(), e.getValue()); - utilsBean.setProperty(configuration, (String) e.getKey(), e.getValue()); + utilsBean.setProperty(configuration, processKey((String) e.getKey()), e.getValue()); } catch (IllegalAccessException | InvocationTargetException ex) { throw new RuntimeException(ex); } @@ -110,4 +110,17 @@ public MultiLangDaemonConfiguration getConfiguration(InputStream configStream) { } return getConfiguration(properties); } + + /** + * Processes a configuration key to normalize AWS credentials provider naming. Necessary to conform to + * autogenerated setters. + * @param key the config param key + * @return case-configured param key name + */ + String processKey(String key) { + if (key.toLowerCase().startsWith("awscredentialsprovider")) { + key = key.replaceAll("(?i)awscredentialsprovider", "awsCredentialsProvider"); + } + return key; + } } diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java index 3336be887..c2b164229 100644 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfiguration.java @@ -17,6 +17,7 @@ import java.lang.reflect.InvocationTargetException; import java.net.URI; +import java.time.Duration; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -41,6 +42,7 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; import software.amazon.awssdk.services.kinesis.KinesisAsyncClientBuilder; import software.amazon.kinesis.checkpoint.CheckpointConfig; @@ -55,7 +57,7 @@ import software.amazon.kinesis.lifecycle.LifecycleConfig; import software.amazon.kinesis.metrics.MetricsConfig; import software.amazon.kinesis.metrics.MetricsLevel; -import software.amazon.kinesis.multilang.config.credentials.V2CredentialWrapper; +import software.amazon.kinesis.multilang.config.converter.DurationConverter; import software.amazon.kinesis.processor.ProcessorConfig; import software.amazon.kinesis.processor.ShardRecordProcessorFactory; import software.amazon.kinesis.retrieval.RetrievalConfig; @@ -156,6 +158,9 @@ public void setInitialPositionInStream(InitialPositionInStream initialPositionIn @ConfigurationSettable(configurationClass = CoordinatorConfig.class) private long schedulerInitializationBackoffTimeMillis; + @ConfigurationSettable(configurationClass = CoordinatorConfig.class) + private CoordinatorConfig.ClientVersionConfig clientVersionConfig; + @ConfigurationSettable(configurationClass = LifecycleConfig.class) private long taskBackoffTimeMillis; @@ -189,6 +194,22 @@ public void setMetricsEnabledDimensions(String[] dimensions) { @Delegate(types = PollingConfigBean.PollingConfigBeanDelegate.class) private final PollingConfigBean pollingConfig = new PollingConfigBean(); + @Delegate(types = GracefulLeaseHandoffConfigBean.GracefulLeaseHandoffConfigBeanDelegate.class) + private final GracefulLeaseHandoffConfigBean gracefulLeaseHandoffConfigBean = new GracefulLeaseHandoffConfigBean(); + + @Delegate( + types = WorkerUtilizationAwareAssignmentConfigBean.WorkerUtilizationAwareAssignmentConfigBeanDelegate.class) + private final WorkerUtilizationAwareAssignmentConfigBean workerUtilizationAwareAssignmentConfigBean = + new WorkerUtilizationAwareAssignmentConfigBean(); + + @Delegate(types = WorkerMetricStatsTableConfigBean.WorkerMetricsTableConfigBeanDelegate.class) + private final WorkerMetricStatsTableConfigBean workerMetricStatsTableConfigBean = + new WorkerMetricStatsTableConfigBean(); + + @Delegate(types = CoordinatorStateTableConfigBean.CoordinatorStateConfigBeanDelegate.class) + private final CoordinatorStateTableConfigBean coordinatorStateTableConfigBean = + new CoordinatorStateTableConfigBean(); + private boolean validateSequenceNumberBeforeCheckpointing; private long shutdownGraceMillis; @@ -196,19 +217,19 @@ public void setMetricsEnabledDimensions(String[] dimensions) { private final BuilderDynaBean kinesisCredentialsProvider; - public void setAWSCredentialsProvider(String providerString) { + public void setAwsCredentialsProvider(String providerString) { kinesisCredentialsProvider.set("", providerString); } private final BuilderDynaBean dynamoDBCredentialsProvider; - public void setAWSCredentialsProviderDynamoDB(String providerString) { + public void setAwsCredentialsProviderDynamoDB(String providerString) { dynamoDBCredentialsProvider.set("", providerString); } private final BuilderDynaBean cloudWatchCredentialsProvider; - public void setAWSCredentialsProviderCloudWatch(String providerString) { + public void setAwsCredentialsProviderCloudWatch(String providerString) { cloudWatchCredentialsProvider.set("", providerString); } @@ -252,6 +273,25 @@ public T convert(Class type, Object value) { }, InitialPositionInStream.class); + convertUtilsBean.register( + new Converter() { + @Override + public T convert(Class type, Object value) { + return type.cast(CoordinatorConfig.ClientVersionConfig.valueOf( + value.toString().toUpperCase())); + } + }, + CoordinatorConfig.ClientVersionConfig.class); + + convertUtilsBean.register( + new Converter() { + @Override + public T convert(Class type, Object value) { + return type.cast(BillingMode.valueOf(value.toString().toUpperCase())); + } + }, + BillingMode.class); + convertUtilsBean.register( new Converter() { @Override @@ -279,12 +319,14 @@ public T convert(final Class type, final Object value) { }, Region.class); + convertUtilsBean.register(new DurationConverter(), Duration.class); + ArrayConverter arrayConverter = new ArrayConverter(String[].class, new StringConverter()); arrayConverter.setDelimiter(','); convertUtilsBean.register(arrayConverter, String[].class); - AWSCredentialsProviderPropertyValueDecoder oldCredentialsDecoder = - new AWSCredentialsProviderPropertyValueDecoder(); - Function converter = s -> new V2CredentialWrapper(oldCredentialsDecoder.decodeValue(s)); + AwsCredentialsProviderPropertyValueDecoder credentialsDecoder = + new AwsCredentialsProviderPropertyValueDecoder(); + Function converter = credentialsDecoder::decodeValue; this.kinesisCredentialsProvider = new BuilderDynaBean( AwsCredentialsProvider.class, convertUtilsBean, converter, CREDENTIALS_DEFAULT_SEARCH_PATH); @@ -370,6 +412,22 @@ private void handleRetrievalConfig(RetrievalConfig retrievalConfig, ConfigsBuild retrievalMode.builder(this).build(configsBuilder.kinesisClient(), this)); } + private void handleCoordinatorConfig(CoordinatorConfig coordinatorConfig) { + ConfigurationSettableUtils.resolveFields( + this.coordinatorStateTableConfigBean, coordinatorConfig.coordinatorStateTableConfig()); + } + + private void handleLeaseManagementConfig(LeaseManagementConfig leaseManagementConfig) { + ConfigurationSettableUtils.resolveFields( + this.gracefulLeaseHandoffConfigBean, leaseManagementConfig.gracefulLeaseHandoffConfig()); + ConfigurationSettableUtils.resolveFields( + this.workerUtilizationAwareAssignmentConfigBean, + leaseManagementConfig.workerUtilizationAwareAssignmentConfig()); + ConfigurationSettableUtils.resolveFields( + this.workerMetricStatsTableConfigBean, + leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig()); + } + private Object adjustKinesisHttpConfiguration(Object builderObj) { if (builderObj instanceof KinesisAsyncClientBuilder) { KinesisAsyncClientBuilder builder = (KinesisAsyncClientBuilder) builderObj; @@ -448,6 +506,8 @@ ResolvedConfiguration resolvedConfiguration(ShardRecordProcessorFactory shardRec processorConfig, retrievalConfig); + handleCoordinatorConfig(coordinatorConfig); + handleLeaseManagementConfig(leaseManagementConfig); handleRetrievalConfig(retrievalConfig, configsBuilder); resolveFields(configObjects, null, new HashSet<>(Arrays.asList(ConfigsBuilder.class, PollingConfig.class))); diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java new file mode 100644 index 000000000..0828c9c66 --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerMetricStatsTableConfigBean.java @@ -0,0 +1,56 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package software.amazon.kinesis.multilang.config; + +import lombok.Getter; +import lombok.Setter; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; + +@Getter +@Setter +public class WorkerMetricStatsTableConfigBean { + + interface WorkerMetricsTableConfigBeanDelegate { + String getWorkerMetricsTableName(); + + void setWorkerMetricsTableName(String value); + + BillingMode getWorkerMetricsBillingMode(); + + void setWorkerMetricsBillingMode(BillingMode value); + + long getWorkerMetricsReadCapacity(); + + void setWorkerMetricsReadCapacity(long value); + + long getWorkerMetricsWriteCapacity(); + + void setWorkerMetricsWriteCapacity(long value); + } + + @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "tableName") + private String workerMetricsTableName; + + @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "billingMode") + private BillingMode workerMetricsBillingMode; + + @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "readCapacity") + private long workerMetricsReadCapacity; + + @ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "writeCapacity") + private long workerMetricsWriteCapacity; +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java new file mode 100644 index 000000000..fc3352837 --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBean.java @@ -0,0 +1,106 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package software.amazon.kinesis.multilang.config; + +import java.time.Duration; + +import lombok.Getter; +import lombok.Setter; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig; + +@Getter +@Setter +public class WorkerUtilizationAwareAssignmentConfigBean { + + interface WorkerUtilizationAwareAssignmentConfigBeanDelegate { + long getInMemoryWorkerMetricsCaptureFrequencyMillis(); + + void setInMemoryWorkerMetricsCaptureFrequencyMillis(long value); + + long getWorkerMetricsReporterFreqInMillis(); + + void setWorkerMetricsReporterFreqInMillis(long value); + + int getNoOfPersistedMetricsPerWorkerMetrics(); + + void setNoOfPersistedMetricsPerWorkerMetrics(int value); + + Boolean getDisableWorkerMetrics(); + + void setDisableWorkerMetrics(Boolean value); + + double getMaxThroughputPerHostKBps(); + + void setMaxThroughputPerHostKBps(double value); + + int getDampeningPercentage(); + + void setDampeningPercentage(int value); + + int getReBalanceThresholdPercentage(); + + void setReBalanceThresholdPercentage(int value); + + Boolean getAllowThroughputOvershoot(); + + void setAllowThroughputOvershoot(Boolean value); + + int getVarianceBalancingFrequency(); + + void setVarianceBalancingFrequency(int value); + + double getWorkerMetricsEMAAlpha(); + + void setWorkerMetricsEMAAlpha(double value); + + void setStaleWorkerMetricsEntryCleanupDuration(Duration value); + + Duration getStaleWorkerMetricsEntryCleanupDuration(); + } + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private long inMemoryWorkerMetricsCaptureFrequencyMillis; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private long workerMetricsReporterFreqInMillis; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private int noOfPersistedMetricsPerWorkerMetrics; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private Boolean disableWorkerMetrics; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private double maxThroughputPerHostKBps; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private int dampeningPercentage; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private int reBalanceThresholdPercentage; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private Boolean allowThroughputOvershoot; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private int varianceBalancingFrequency; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private double workerMetricsEMAAlpha; + + @ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class) + private Duration staleWorkerMetricsEntryCleanupDuration; +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java new file mode 100644 index 000000000..3c07f1f2c --- /dev/null +++ b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/converter/DurationConverter.java @@ -0,0 +1,37 @@ +package software.amazon.kinesis.multilang.config.converter; + +import java.time.Duration; + +import org.apache.commons.beanutils.Converter; + +/** + * Converter that converts Duration text representation to a Duration object. + * Refer to {@code Duration.parse} javadocs for the exact text representation. + */ +public class DurationConverter implements Converter { + + @Override + public T convert(Class type, Object value) { + if (value == null) { + return null; + } + + if (type != Duration.class) { + throw new ConversionException("Can only convert to Duration"); + } + + String durationString = value.toString().trim(); + final Duration duration = Duration.parse(durationString); + if (duration.isNegative()) { + throw new ConversionException("Negative values are not permitted for duration: " + durationString); + } + + return type.cast(duration); + } + + public static class ConversionException extends RuntimeException { + public ConversionException(String message) { + super(message); + } + } +} diff --git a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java b/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java deleted file mode 100644 index e1b6072af..000000000 --- a/amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/config/credentials/V2CredentialWrapper.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2019 Amazon.com, Inc. or its affiliates. - * Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package software.amazon.kinesis.multilang.config.credentials; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import lombok.RequiredArgsConstructor; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; - -@RequiredArgsConstructor -public class V2CredentialWrapper implements AwsCredentialsProvider { - - private final AWSCredentialsProvider oldCredentialsProvider; - - @Override - public AwsCredentials resolveCredentials() { - AWSCredentials current = oldCredentialsProvider.getCredentials(); - if (current instanceof AWSSessionCredentials) { - return AwsSessionCredentials.create( - current.getAWSAccessKeyId(), - current.getAWSSecretKey(), - ((AWSSessionCredentials) current).getSessionToken()); - } - return new AwsCredentials() { - @Override - public String accessKeyId() { - return current.getAWSAccessKeyId(); - } - - @Override - public String secretAccessKey() { - return current.getAWSSecretKey(); - } - }; - } -} diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java index de5a1405c..53b7f2d80 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonConfigTest.java @@ -65,7 +65,7 @@ public void setup(String streamName, String streamArn) throws IOException { String properties = String.format( "executableName = %s\n" + "applicationName = %s\n" - + "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n" + + "AwsCredentialsProvider = DefaultCredentialsProvider\n" + "processingLanguage = malbolge\n" + "regionName = %s\n", EXE, APPLICATION_NAME, "us-east-1"); @@ -182,7 +182,7 @@ private void assertConfigurationsMatch(String expectedStreamName, String expecte @Test public void testPropertyValidation() { String propertiesNoExecutableName = "applicationName = testApp \n" + "streamName = fakeStream \n" - + "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n" + "processingLanguage = malbolge"; + + "AwsCredentialsProvider = DefaultCredentialsProvider\n" + "processingLanguage = malbolge"; ClassLoader classLoader = Mockito.mock(ClassLoader.class); Mockito.doReturn(new ByteArrayInputStream(propertiesNoExecutableName.getBytes())) diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java index 3e689437c..453f81aa7 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/MultiLangDaemonTest.java @@ -157,7 +157,7 @@ public void testNoPropertiesFileArgumentOrOption() { MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments(); - daemon.propertiesFile(arguments); + daemon.validateAndGetPropertiesFileName(arguments); } @Test @@ -166,7 +166,7 @@ public void testSuccessfulPropertiesArgument() { MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments(); arguments.parameters = Collections.singletonList(expectedPropertiesFile); - String propertiesFile = daemon.propertiesFile(arguments); + String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments); assertThat(propertiesFile, equalTo(expectedPropertiesFile)); } @@ -180,7 +180,7 @@ public void testPropertiesOptionsOverrideArgument() { arguments.parameters = Collections.singletonList(propertiesArgument); arguments.propertiesFile = propertiesOptions; - String propertiesFile = daemon.propertiesFile(arguments); + String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments); assertThat(propertiesFile, equalTo(propertiesOptions)); } @@ -193,7 +193,7 @@ public void testExtraArgumentsFailure() { MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments(); arguments.parameters = Arrays.asList("parameter1", "parameter2"); - daemon.propertiesFile(arguments); + daemon.validateAndGetPropertiesFileName(arguments); } @Test diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java index fbffee816..3c2de9c98 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/NestedPropertyKeyTest.java @@ -14,11 +14,11 @@ */ package software.amazon.kinesis.multilang; -import com.amazonaws.regions.Regions; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; +import software.amazon.awssdk.regions.Region; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.verify; @@ -64,9 +64,9 @@ public void testInvalidEndpointDoubleCaret() { @Test public void testEndpointRegion() { - final Regions expectedRegion = Regions.GovCloud; + final Region expectedRegion = Region.US_GOV_WEST_1; - parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.getName())); + parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.id())); verify(mockProcessor).acceptEndpointRegion(expectedRegion); } diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java index c27a425d3..c479f77a7 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProviderTest.java @@ -31,15 +31,14 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest { */ @Test public void testConstructorWithoutOptionalParams() { - new KclSTSAssumeRoleSessionCredentialsProvider(new String[] {ARN, SESSION_NAME}); + new KclStsAssumeRoleCredentialsProvider(new String[] {ARN, SESSION_NAME}); } @Test public void testAcceptEndpoint() { // discovered exception during e2e testing; therefore, this test is // to simply verify the constructed STS client doesn't go *boom* - final KclSTSAssumeRoleSessionCredentialsProvider provider = - new KclSTSAssumeRoleSessionCredentialsProvider(ARN, SESSION_NAME); + final KclStsAssumeRoleCredentialsProvider provider = new KclStsAssumeRoleCredentialsProvider(ARN, SESSION_NAME); provider.acceptEndpoint("endpoint", "us-east-1"); } @@ -53,7 +52,7 @@ public void testVarArgs() { } } - private static class VarArgsSpy extends KclSTSAssumeRoleSessionCredentialsProvider { + private static class VarArgsSpy extends KclStsAssumeRoleCredentialsProvider { private String externalId; diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java similarity index 51% rename from amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java rename to amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java index ba5a0925f..1f60c2d9c 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AWSCredentialsProviderPropertyValueDecoderTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/AwsCredentialsProviderPropertyValueDecoderTest.java @@ -16,16 +16,17 @@ import java.util.Arrays; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSCredentialsProviderChain; -import com.amazonaws.auth.BasicAWSCredentials; import lombok.ToString; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.hamcrest.TypeSafeDiagnosingMatcher; import org.junit.Test; -import software.amazon.kinesis.multilang.auth.KclSTSAssumeRoleSessionCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; +import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; @@ -33,31 +34,32 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; -public class AWSCredentialsProviderPropertyValueDecoderTest { +public class AwsCredentialsProviderPropertyValueDecoderTest { private static final String TEST_ACCESS_KEY_ID = "123"; private static final String TEST_SECRET_KEY = "456"; private final String credentialName1 = AlwaysSucceedCredentialsProvider.class.getName(); private final String credentialName2 = ConstructorCredentialsProvider.class.getName(); - private final AWSCredentialsProviderPropertyValueDecoder decoder = new AWSCredentialsProviderPropertyValueDecoder(); + private final String createCredentialClass = CreateProvider.class.getName(); + private final AwsCredentialsProviderPropertyValueDecoder decoder = new AwsCredentialsProviderPropertyValueDecoder(); @ToString - private static class AWSCredentialsMatcher extends TypeSafeDiagnosingMatcher { + private static class AwsCredentialsMatcher extends TypeSafeDiagnosingMatcher { private final Matcher akidMatcher; private final Matcher secretMatcher; private final Matcher> classMatcher; - public AWSCredentialsMatcher(String akid, String secret) { + public AwsCredentialsMatcher(String akid, String secret) { this.akidMatcher = equalTo(akid); this.secretMatcher = equalTo(secret); - this.classMatcher = instanceOf(AWSCredentialsProviderChain.class); + this.classMatcher = instanceOf(AwsCredentialsProviderChain.class); } @Override - protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatchDescription) { - AWSCredentials actual = item.getCredentials(); + protected boolean matchesSafely(AwsCredentialsProvider item, Description mismatchDescription) { + AwsCredentials actual = item.resolveCredentials(); boolean matched = true; if (!classMatcher.matches(item)) { @@ -65,12 +67,12 @@ protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatc matched = false; } - if (!akidMatcher.matches(actual.getAWSAccessKeyId())) { - akidMatcher.describeMismatch(actual.getAWSAccessKeyId(), mismatchDescription); + if (!akidMatcher.matches(actual.accessKeyId())) { + akidMatcher.describeMismatch(actual.accessKeyId(), mismatchDescription); matched = false; } - if (!secretMatcher.matches(actual.getAWSSecretKey())) { - secretMatcher.describeMismatch(actual.getAWSSecretKey(), mismatchDescription); + if (!secretMatcher.matches(actual.secretAccessKey())) { + secretMatcher.describeMismatch(actual.secretAccessKey(), mismatchDescription); matched = false; } return matched; @@ -79,36 +81,36 @@ protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatc @Override public void describeTo(Description description) { description - .appendText("An AWSCredentialsProvider that provides an AWSCredential matching: ") + .appendText("An AwsCredentialsProvider that provides an AwsCredential matching: ") .appendList("(", ", ", ")", Arrays.asList(classMatcher, akidMatcher, secretMatcher)); } } - private static AWSCredentialsMatcher hasCredentials(String akid, String secret) { - return new AWSCredentialsMatcher(akid, secret); + private static AwsCredentialsMatcher hasCredentials(String akid, String secret) { + return new AwsCredentialsMatcher(akid, secret); } @Test public void testSingleProvider() { - AWSCredentialsProvider provider = decoder.decodeValue(credentialName1); + AwsCredentialsProvider provider = decoder.decodeValue(credentialName1); assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY)); } @Test public void testTwoProviders() { - AWSCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1); + AwsCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1); assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY)); } @Test public void testProfileProviderWithOneArg() { - AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg"); + AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg"); assertThat(provider, hasCredentials("arg", "blank")); } @Test public void testProfileProviderWithTwoArgs() { - AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2"); + AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2"); assertThat(provider, hasCredentials("arg1", "arg2")); } @@ -118,14 +120,33 @@ public void testProfileProviderWithTwoArgs() { @Test public void testKclAuthProvider() { for (final String className : Arrays.asList( - KclSTSAssumeRoleSessionCredentialsProvider.class.getName(), // fully-qualified name - KclSTSAssumeRoleSessionCredentialsProvider.class.getSimpleName() // name-only; needs prefix - )) { - final AWSCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName"); + KclStsAssumeRoleCredentialsProvider.class.getName(), // fully-qualified name + KclStsAssumeRoleCredentialsProvider.class.getSimpleName(), // name-only; needs prefix + StsAssumeRoleCredentialsProvider.class.getName(), // user passes full sts package path + StsAssumeRoleCredentialsProvider.class.getSimpleName())) { + final AwsCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName"); assertNotNull(className, provider); } } + /** + * Test that OneArgCreateProvider in the SDK v2 can process a create() method + */ + @Test + public void testEmptyCreateProvider() { + AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass); + assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY)); + } + + /** + * Test that OneArgCreateProvider in the SDK v2 can process a create(arg1) method + */ + @Test + public void testOneArgCreateProvider() { + AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass + "|testCreateProperty"); + assertThat(provider, hasCredentials("testCreateProperty", TEST_SECRET_KEY)); + } + /** * Test that a provider can be instantiated by its varargs constructor. */ @@ -135,28 +156,24 @@ public void testVarArgAuthProvider() { final String className = VarArgCredentialsProvider.class.getName(); final String encodedValue = className + "|" + String.join("|", args); - final AWSCredentialsProvider provider = decoder.decodeValue(encodedValue); - assertEquals(Arrays.toString(args), provider.getCredentials().getAWSAccessKeyId()); + final AwsCredentialsProvider provider = decoder.decodeValue(encodedValue); + assertEquals(Arrays.toString(args), provider.resolveCredentials().accessKeyId()); } /** * This credentials provider will always succeed */ - public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider { - + public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY); } - - @Override - public void refresh() {} } /** * This credentials provider needs a constructor call to instantiate it */ - public static class ConstructorCredentialsProvider implements AWSCredentialsProvider { + public static class ConstructorCredentialsProvider implements AwsCredentialsProvider { private String arg1; private String arg2; @@ -172,15 +189,12 @@ public ConstructorCredentialsProvider(String arg1, String arg2) { } @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials(arg1, arg2); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create(arg1, arg2); } - - @Override - public void refresh() {} } - private static class VarArgCredentialsProvider implements AWSCredentialsProvider { + private static class VarArgCredentialsProvider implements AwsCredentialsProvider { private final String[] args; @@ -189,13 +203,34 @@ public VarArgCredentialsProvider(final String[] args) { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { // KISS solution to surface the constructor args final String flattenedArgs = Arrays.toString(args); - return new BasicAWSCredentials(flattenedArgs, flattenedArgs); + return AwsBasicCredentials.create(flattenedArgs, flattenedArgs); + } + } + + /** + * Credentials provider to test AWS SDK v2 create() methods for providers like ProfileCredentialsProvider + */ + public static class CreateProvider implements AwsCredentialsProvider { + private String accessKeyId; + + private CreateProvider(String accessKeyId) { + this.accessKeyId = accessKeyId; + } + + public static CreateProvider create() { + return new CreateProvider(TEST_ACCESS_KEY_ID); + } + + public static CreateProvider create(String accessKeyId) { + return new CreateProvider(accessKeyId); } @Override - public void refresh() {} + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create(accessKeyId, TEST_SECRET_KEY); + } } } diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java index 5e0db340c..cee3cad27 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/ConfigurationSettableUtilsTest.java @@ -52,6 +52,16 @@ public void testPrimitivesSet() { assertThat(actual, equalTo(expected)); } + @Test + public void testBoolean() { + ConfigResult expected = ConfigResult.builder().bool(false).build(); + + ConfigObject configObject = ConfigObject.builder().bool(expected.bool).build(); + ConfigResult actual = resolve(configObject); + + assertThat(actual, equalTo(expected)); + } + @Test public void testHeapValuesSet() { ConfigResult expected = @@ -147,6 +157,9 @@ public static class ConfigResult { private Long boxedLong; private ComplexValue complexValue; + @Builder.Default + private Boolean bool = true; + private Optional optionalString; private Optional optionalInteger; private Optional optionalLong; @@ -175,6 +188,10 @@ public static class ConfigObject { @ConfigurationSettable(configurationClass = ConfigResult.class) private int rawInt; + @ConfigurationSettable(configurationClass = ConfigResult.class) + @Builder.Default + private Boolean bool = true; + @ConfigurationSettable(configurationClass = ConfigResult.class) private Integer boxedInt; diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java index b0e3b870b..a72b1a960 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/KinesisClientLibConfiguratorTest.java @@ -20,19 +20,21 @@ import java.util.Arrays; import java.util.Date; import java.util.HashSet; +import java.util.NoSuchElementException; import java.util.Set; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import com.google.common.collect.ImmutableSet; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.runners.MockitoJUnitRunner; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; import software.amazon.kinesis.common.InitialPositionInStream; +import software.amazon.kinesis.coordinator.CoordinatorConfig; import software.amazon.kinesis.metrics.MetricsLevel; import static org.hamcrest.CoreMatchers.equalTo; @@ -40,6 +42,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -60,7 +63,7 @@ public void testWithBasicSetup() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123" }, '\n')); @@ -69,6 +72,8 @@ public void testWithBasicSetup() { assertEquals(config.getWorkerIdentifier(), "123"); assertThat(config.getMaxGetRecordsThreadPool(), nullValue()); assertThat(config.getRetryGetRecordsInSeconds(), nullValue()); + assertNull(config.getGracefulLeaseHandoffTimeoutMillis()); + assertNull(config.getIsGracefulLeaseHandoffEnabled()); } @Test @@ -77,7 +82,7 @@ public void testWithLongVariables() { new String[] { "applicationName = app", "streamName = 123", - "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2, "workerId = 123", "failoverTimeMillis = 100", "shardSyncIntervalMillis = 500" @@ -98,7 +103,7 @@ public void testWithInitialPositionInStreamExtended() { new String[] { "applicationName = app", "streamName = 123", - "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2, "initialPositionInStreamExtended = " + epochTimeInSeconds }, '\n')); @@ -116,7 +121,7 @@ public void testInvalidInitialPositionInStream() { new String[] { "applicationName = app", "streamName = 123", - "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2, "initialPositionInStream = AT_TIMESTAMP" }, '\n')); @@ -136,7 +141,7 @@ public void testInvalidInitialPositionInStreamExtended() { new String[] { "applicationName = app", "streamName = 123", - "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2, "initialPositionInStreamExtended = null" }, '\n')); @@ -147,11 +152,156 @@ public void testInvalidInitialPositionInStreamExtended() { } } + @Test + public void testGracefulLeaseHandoffConfig() { + final Long testGracefulLeaseHandoffTimeoutMillis = 12345L; + final boolean testGracefulLeaseHandoffEnabled = true; + + final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "gracefulLeaseHandoffTimeoutMillis = " + testGracefulLeaseHandoffTimeoutMillis, + "isGracefulLeaseHandoffEnabled = " + testGracefulLeaseHandoffEnabled + }, + '\n')); + + assertEquals(testGracefulLeaseHandoffTimeoutMillis, config.getGracefulLeaseHandoffTimeoutMillis()); + assertEquals(testGracefulLeaseHandoffEnabled, config.getIsGracefulLeaseHandoffEnabled()); + } + + @Test + public void testClientVersionConfig() { + final CoordinatorConfig.ClientVersionConfig testClientVersionConfig = Arrays.stream( + CoordinatorConfig.ClientVersionConfig.values()) + .findAny() + .orElseThrow(NoSuchElementException::new); + + final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "clientVersionConfig = " + testClientVersionConfig.name() + }, + '\n')); + + assertEquals(testClientVersionConfig, config.getClientVersionConfig()); + } + + @Test + public void testCoordinatorStateConfig() { + final String testCoordinatorStateTableName = "CoordState"; + final BillingMode testCoordinatorStateBillingMode = BillingMode.PAY_PER_REQUEST; + final long testCoordinatorStateReadCapacity = 123; + final long testCoordinatorStateWriteCapacity = 123; + + final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "coordinatorStateTableName = " + testCoordinatorStateTableName, + "coordinatorStateBillingMode = " + testCoordinatorStateBillingMode.name(), + "coordinatorStateReadCapacity = " + testCoordinatorStateReadCapacity, + "coordinatorStateWriteCapacity = " + testCoordinatorStateWriteCapacity + }, + '\n')); + + assertEquals(testCoordinatorStateTableName, config.getCoordinatorStateTableName()); + assertEquals(testCoordinatorStateBillingMode, config.getCoordinatorStateBillingMode()); + assertEquals(testCoordinatorStateReadCapacity, config.getCoordinatorStateReadCapacity()); + assertEquals(testCoordinatorStateWriteCapacity, config.getCoordinatorStateWriteCapacity()); + } + + @Test + public void testWorkerUtilizationAwareAssignmentConfig() { + final long testInMemoryWorkerMetricsCaptureFrequencyMillis = 123; + final long testWorkerMetricsReporterFreqInMillis = 123; + final long testNoOfPersistedMetricsPerWorkerMetrics = 123; + final Boolean testDisableWorkerMetrics = true; + final double testMaxThroughputPerHostKBps = 123; + final long testDampeningPercentage = 12; + final long testReBalanceThresholdPercentage = 12; + final Boolean testAllowThroughputOvershoot = false; + final long testVarianceBalancingFrequency = 12; + final double testWorkerMetricsEMAAlpha = .123; + + final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "inMemoryWorkerMetricsCaptureFrequencyMillis = " + testInMemoryWorkerMetricsCaptureFrequencyMillis, + "workerMetricsReporterFreqInMillis = " + testWorkerMetricsReporterFreqInMillis, + "noOfPersistedMetricsPerWorkerMetrics = " + testNoOfPersistedMetricsPerWorkerMetrics, + "disableWorkerMetrics = " + testDisableWorkerMetrics, + "maxThroughputPerHostKBps = " + testMaxThroughputPerHostKBps, + "dampeningPercentage = " + testDampeningPercentage, + "reBalanceThresholdPercentage = " + testReBalanceThresholdPercentage, + "allowThroughputOvershoot = " + testAllowThroughputOvershoot, + "varianceBalancingFrequency = " + testVarianceBalancingFrequency, + "workerMetricsEMAAlpha = " + testWorkerMetricsEMAAlpha + }, + '\n')); + + assertEquals( + testInMemoryWorkerMetricsCaptureFrequencyMillis, + config.getInMemoryWorkerMetricsCaptureFrequencyMillis()); + assertEquals(testWorkerMetricsReporterFreqInMillis, config.getWorkerMetricsReporterFreqInMillis()); + assertEquals(testNoOfPersistedMetricsPerWorkerMetrics, config.getNoOfPersistedMetricsPerWorkerMetrics()); + assertEquals(testDisableWorkerMetrics, config.getDisableWorkerMetrics()); + assertEquals(testMaxThroughputPerHostKBps, config.getMaxThroughputPerHostKBps(), 0.0001); + assertEquals(testDampeningPercentage, config.getDampeningPercentage()); + assertEquals(testReBalanceThresholdPercentage, config.getReBalanceThresholdPercentage()); + assertEquals(testAllowThroughputOvershoot, config.getAllowThroughputOvershoot()); + assertEquals(testVarianceBalancingFrequency, config.getVarianceBalancingFrequency()); + assertEquals(testWorkerMetricsEMAAlpha, config.getWorkerMetricsEMAAlpha(), 0.0001); + } + + @Test + public void testWorkerMetricsConfig() { + final String testWorkerMetricsTableName = "CoordState"; + final BillingMode testWorkerMetricsBillingMode = BillingMode.PROVISIONED; + final long testWorkerMetricsReadCapacity = 123; + final long testWorkerMetricsWriteCapacity = 123; + + final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "workerMetricsTableName = " + testWorkerMetricsTableName, + "workerMetricsBillingMode = " + testWorkerMetricsBillingMode.name(), + "workerMetricsReadCapacity = " + testWorkerMetricsReadCapacity, + "workerMetricsWriteCapacity = " + testWorkerMetricsWriteCapacity + }, + '\n')); + + assertEquals(testWorkerMetricsTableName, config.getWorkerMetricsTableName()); + assertEquals(testWorkerMetricsBillingMode, config.getWorkerMetricsBillingMode()); + assertEquals(testWorkerMetricsReadCapacity, config.getWorkerMetricsReadCapacity()); + assertEquals(testWorkerMetricsWriteCapacity, config.getWorkerMetricsWriteCapacity()); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidClientVersionConfig() { + getConfiguration(StringUtils.join( + new String[] { + "applicationName = dummyApplicationName", + "streamName = dummyStreamName", + "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "clientVersionConfig = " + "invalid_client_version_config" + }, + '\n')); + } + @Test public void testWithUnsupportedClientConfigurationVariables() { MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( new String[] { - "AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2, + "AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2, "workerId = id", "kinesisClientConfig = {}", "streamName = stream", @@ -170,7 +320,7 @@ public void testWithIntVariables() { MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join( new String[] { "streamName = kinesis", - "AWSCredentialsProvider = " + credentialName2 + ", " + credentialName1, + "AwsCredentialsProvider = " + credentialName2 + ", " + credentialName1, "workerId = w123", "maxRecords = 10", "metricsMaxQueueSize = 20", @@ -195,7 +345,7 @@ public void testWithBooleanVariables() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD, " + credentialName1, + "AwsCredentialsProvider = ABCD, " + credentialName1, "workerId = 0", "cleanupLeasesUponShardCompletion = false", "validateSequenceNumberBeforeCheckpointing = true" @@ -215,7 +365,7 @@ public void testWithStringVariables() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 1", "kinesisEndpoint = https://kinesis", "metricsLevel = SUMMARY" @@ -233,7 +383,7 @@ public void testWithSetVariables() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 1", "metricsEnabledDimensions = ShardId, WorkerIdentifier" }, @@ -253,7 +403,7 @@ public void testWithInitialPositionInStreamTrimHorizon() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 123", "initialPositionInStream = TriM_Horizon" }, @@ -268,7 +418,7 @@ public void testWithInitialPositionInStreamLatest() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 123", "initialPositionInStream = LateSt" }, @@ -283,7 +433,7 @@ public void testSkippingNonKCLVariables() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 123", "initialPositionInStream = TriM_Horizon", "abc = 1" @@ -302,7 +452,7 @@ public void testEmptyOptionalVariables() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 123", "initialPositionInStream = TriM_Horizon", "maxGetRecordsThreadPool = 1" @@ -318,7 +468,7 @@ public void testWithZeroValue() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = ABCD," + credentialName1, + "AwsCredentialsProvider = ABCD," + credentialName1, "workerId = 123", "initialPositionInStream = TriM_Horizon", "maxGetRecordsThreadPool = 0", @@ -334,7 +484,7 @@ public void testWithInvalidIntValue() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123", "failoverTimeMillis = 100nf" }, @@ -348,7 +498,7 @@ public void testWithNegativeIntValue() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123", "failoverTimeMillis = -12" }, @@ -380,7 +530,7 @@ public void testWithMissingWorkerId() { new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "failoverTimeMillis = 100", "shardSyncIntervalMillis = 500" }, @@ -397,7 +547,7 @@ public void testWithMissingStreamNameAndMissingStreamArn() { String test = StringUtils.join( new String[] { "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123", "failoverTimeMillis = 100" }, @@ -410,7 +560,7 @@ public void testWithEmptyStreamNameAndMissingStreamArn() { String test = StringUtils.join( new String[] { "applicationName = b", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123", "failoverTimeMillis = 100", "streamName = ", @@ -425,7 +575,7 @@ public void testWithMissingApplicationName() { String test = StringUtils.join( new String[] { "streamName = a", - "AWSCredentialsProvider = " + credentialName1, + "AwsCredentialsProvider = " + credentialName1, "workerId = 123", "failoverTimeMillis = 100" }, @@ -434,12 +584,12 @@ public void testWithMissingApplicationName() { } @Test - public void testWithAWSCredentialsFailed() { + public void testWithAwsCredentialsFailed() { String test = StringUtils.join( new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialName2, + "AwsCredentialsProvider = " + credentialName2, "failoverTimeMillis = 100", "shardSyncIntervalMillis = 500" }, @@ -457,16 +607,44 @@ public void testWithAWSCredentialsFailed() { } } + @Test + public void testProcessKeyWithExpectedCasing() { + String key = "AwsCredentialsProvider"; + String result = configurator.processKey(key); + assertEquals("awsCredentialsProvider", result); + } + + @Test + public void testProcessKeyWithOldCasing() { + String key = "AWSCredentialsProvider"; + String result = configurator.processKey(key); + assertEquals("awsCredentialsProvider", result); + } + + @Test + public void testProcessKeyWithMixedCasing() { + String key = "AwScReDeNtIaLsPrOvIdEr"; + String result = configurator.processKey(key); + assertEquals("awsCredentialsProvider", result); + } + + @Test + public void testProcessKeyWithSuffix() { + String key = "awscredentialsproviderDynamoDB"; + String result = configurator.processKey(key); + assertEquals("awsCredentialsProviderDynamoDB", result); + } + // TODO: fix this test @Test - public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() { + public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatch() { String test = StringUtils.join( new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialNameKinesis, - "AWSCredentialsProviderDynamoDB = " + credentialNameDynamoDB, - "AWSCredentialsProviderCloudWatch = " + credentialNameCloudWatch, + "AwsCredentialsProvider = " + credentialNameKinesis, + "AwsCredentialsProviderDynamoDB = " + credentialNameDynamoDB, + "AwsCredentialsProviderCloudWatch = " + credentialNameCloudWatch, "failoverTimeMillis = 100", "shardSyncIntervalMillis = 500" }, @@ -487,14 +665,14 @@ public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() { // TODO: fix this test @Test - public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() { + public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatchFailed() { String test = StringUtils.join( new String[] { "streamName = a", "applicationName = b", - "AWSCredentialsProvider = " + credentialNameKinesis, - "AWSCredentialsProviderDynamoDB = " + credentialName2, - "AWSCredentialsProviderCloudWatch = " + credentialName2, + "AwsCredentialsProvider = " + credentialNameKinesis, + "AwsCredentialsProviderDynamoDB = " + credentialName2, + "AwsCredentialsProviderCloudWatch = " + credentialName2, "failoverTimeMillis = 100", "shardSyncIntervalMillis = 500" }, @@ -526,71 +704,52 @@ public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() { /** * This credentials provider will always succeed */ - public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider { - + public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("a", "b"); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("a", "b"); } - - @Override - public void refresh() {} } /** * This credentials provider will always succeed */ - public static class AlwaysSucceedCredentialsProviderKinesis implements AWSCredentialsProvider { - + public static class AlwaysSucceedCredentialsProviderKinesis implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("", ""); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY"); } - - @Override - public void refresh() {} } /** * This credentials provider will always succeed */ - public static class AlwaysSucceedCredentialsProviderDynamoDB implements AWSCredentialsProvider { - + public static class AlwaysSucceedCredentialsProviderDynamoDB implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("", ""); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY"); } - - @Override - public void refresh() {} } /** * This credentials provider will always succeed */ - public static class AlwaysSucceedCredentialsProviderCloudWatch implements AWSCredentialsProvider { - + public static class AlwaysSucceedCredentialsProviderCloudWatch implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("", ""); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY"); } - - @Override - public void refresh() {} } /** * This credentials provider will always fail */ - public static class AlwaysFailCredentialsProvider implements AWSCredentialsProvider { + public static class AlwaysFailCredentialsProvider implements AwsCredentialsProvider { @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { throw new IllegalArgumentException(); } - - @Override - public void refresh() {} } private MultiLangDaemonConfiguration getConfiguration(String configString) { diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java index 1c45eb6e8..60a55c65a 100644 --- a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/MultiLangDaemonConfigurationTest.java @@ -15,6 +15,9 @@ package software.amazon.kinesis.multilang.config; +import java.util.Arrays; +import java.util.NoSuchElementException; + import org.apache.commons.beanutils.BeanUtilsBean; import org.apache.commons.beanutils.ConvertUtilsBean; import org.junit.After; @@ -24,8 +27,16 @@ import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.runners.MockitoJUnitRunner; import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; +import software.amazon.kinesis.common.ConfigsBuilder; +import software.amazon.kinesis.coordinator.CoordinatorConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.processor.ShardRecordProcessorFactory; import software.amazon.kinesis.retrieval.fanout.FanOutConfig; import software.amazon.kinesis.retrieval.polling.PollingConfig; @@ -34,6 +45,7 @@ import static org.hamcrest.CoreMatchers.instanceOf; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -41,6 +53,8 @@ public class MultiLangDaemonConfigurationTest { private static final String AWS_REGION_PROPERTY_NAME = "aws.region"; + private static final String DUMMY_APPLICATION_NAME = "dummyApplicationName"; + private static final String DUMMY_STREAM_NAME = "dummyStreamName"; private BeanUtilsBean utilsBean; private ConvertUtilsBean convertUtilsBean; @@ -71,8 +85,8 @@ public void after() { public MultiLangDaemonConfiguration baseConfiguration() { MultiLangDaemonConfiguration configuration = new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean); - configuration.setApplicationName("Test"); - configuration.setStreamName("Test"); + configuration.setApplicationName(DUMMY_APPLICATION_NAME); + configuration.setStreamName(DUMMY_STREAM_NAME); configuration.getKinesisCredentialsProvider().set("class", DefaultCredentialsProvider.class.getName()); return configuration; @@ -111,6 +125,197 @@ public void testSetLeaseTableDeletionProtectionEnabledToTrue() { assertTrue(resolvedConfiguration.leaseManagementConfig.leaseTableDeletionProtectionEnabled()); } + @Test + public void testGracefulLeaseHandoffConfig() { + final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig = + getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig(); + + final long testGracefulLeaseHandoffTimeoutMillis = + defaultGracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + 12345; + final boolean testGracefulLeaseHandoffEnabled = + !defaultGracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled(); + + final MultiLangDaemonConfiguration configuration = baseConfiguration(); + configuration.setGracefulLeaseHandoffTimeoutMillis(testGracefulLeaseHandoffTimeoutMillis); + configuration.setIsGracefulLeaseHandoffEnabled(testGracefulLeaseHandoffEnabled); + + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + + final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig = + resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig(); + + assertEquals( + testGracefulLeaseHandoffTimeoutMillis, gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis()); + assertEquals(testGracefulLeaseHandoffEnabled, gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled()); + } + + @Test + public void testGracefulLeaseHandoffUsesDefaults() { + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory); + + final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig = + resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig(); + + final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig = + getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig(); + + assertEquals(defaultGracefulLeaseHandoffConfig, gracefulLeaseHandoffConfig); + } + + @Test + public void testWorkerUtilizationAwareAssignmentConfig() { + MultiLangDaemonConfiguration configuration = baseConfiguration(); + + configuration.setInMemoryWorkerMetricsCaptureFrequencyMillis(123); + configuration.setWorkerMetricsReporterFreqInMillis(123); + configuration.setNoOfPersistedMetricsPerWorkerMetrics(123); + configuration.setDisableWorkerMetrics(true); + configuration.setMaxThroughputPerHostKBps(.123); + configuration.setDampeningPercentage(12); + configuration.setReBalanceThresholdPercentage(12); + configuration.setAllowThroughputOvershoot(false); + configuration.setVarianceBalancingFrequency(12); + configuration.setWorkerMetricsEMAAlpha(.123); + + MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig; + LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config = + leaseManagementConfig.workerUtilizationAwareAssignmentConfig(); + + assertEquals(config.inMemoryWorkerMetricsCaptureFrequencyMillis(), 123); + assertEquals(config.workerMetricsReporterFreqInMillis(), 123); + assertEquals(config.noOfPersistedMetricsPerWorkerMetrics(), 123); + assertTrue(config.disableWorkerMetrics()); + assertEquals(config.maxThroughputPerHostKBps(), .123, .25); + assertEquals(config.dampeningPercentage(), 12); + assertEquals(config.reBalanceThresholdPercentage(), 12); + assertFalse(config.allowThroughputOvershoot()); + assertEquals(config.varianceBalancingFrequency(), 12); + assertEquals(config.workerMetricsEMAAlpha(), .123, .25); + } + + @Test + public void testWorkerUtilizationAwareAssignmentConfigUsesDefaults() { + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig defaultWorkerUtilAwareAssignmentConfig = + getTestConfigsBuilder().leaseManagementConfig().workerUtilizationAwareAssignmentConfig(); + + final MultiLangDaemonConfiguration configuration = baseConfiguration(); + configuration.setVarianceBalancingFrequency( + defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency() + 12345); + + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig resolvedWorkerUtilAwareAssignmentConfig = + resolvedConfiguration.leaseManagementConfig.workerUtilizationAwareAssignmentConfig(); + + assertNotEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig); + + // apart from the single updated configuration, all other config values should be equal to the default + resolvedWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency( + defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency()); + assertEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig); + } + + @Test + public void testWorkerMetricsTableConfigBean() { + final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PROVISIONED; + + MultiLangDaemonConfiguration configuration = baseConfiguration(); + + configuration.setWorkerMetricsTableName("testTable"); + configuration.setWorkerMetricsBillingMode(testWorkerMetricsTableBillingMode); + configuration.setWorkerMetricsReadCapacity(123); + configuration.setWorkerMetricsWriteCapacity(123); + + MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig; + LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationConfig = + leaseManagementConfig.workerUtilizationAwareAssignmentConfig(); + LeaseManagementConfig.WorkerMetricsTableConfig workerMetricsConfig = + workerUtilizationConfig.workerMetricsTableConfig(); + + assertEquals(workerMetricsConfig.tableName(), "testTable"); + assertEquals(workerMetricsConfig.billingMode(), testWorkerMetricsTableBillingMode); + assertEquals(workerMetricsConfig.readCapacity(), 123); + assertEquals(workerMetricsConfig.writeCapacity(), 123); + } + + @Test + public void testWorkerMetricsTableConfigUsesDefaults() { + final LeaseManagementConfig.WorkerMetricsTableConfig defaultWorkerMetricsTableConfig = getTestConfigsBuilder() + .leaseManagementConfig() + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig(); + + final MultiLangDaemonConfiguration configuration = baseConfiguration(); + configuration.setWorkerMetricsBillingMode(Arrays.stream(BillingMode.values()) + .filter(billingMode -> billingMode != defaultWorkerMetricsTableConfig.billingMode()) + .findFirst() + .orElseThrow(NoSuchElementException::new)); + + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + + final LeaseManagementConfig.WorkerMetricsTableConfig resolvedWorkerMetricsTableConfig = resolvedConfiguration + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig(); + + assertNotEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig); + + // apart from the single updated configuration, all other config values should be equal to the default + resolvedWorkerMetricsTableConfig.billingMode(defaultWorkerMetricsTableConfig.billingMode()); + assertEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig); + } + + @Test + public void testCoordinatorStateTableConfigBean() { + final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PAY_PER_REQUEST; + + MultiLangDaemonConfiguration configuration = baseConfiguration(); + + configuration.setCoordinatorStateTableName("testTable"); + configuration.setCoordinatorStateBillingMode(testWorkerMetricsTableBillingMode); + configuration.setCoordinatorStateReadCapacity(123); + configuration.setCoordinatorStateWriteCapacity(123); + + MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + CoordinatorConfig coordinatorConfig = resolvedConfiguration.getCoordinatorConfig(); + CoordinatorConfig.CoordinatorStateTableConfig coordinatorStateConfig = + coordinatorConfig.coordinatorStateTableConfig(); + assertEquals(coordinatorStateConfig.tableName(), "testTable"); + assertEquals(coordinatorStateConfig.billingMode(), testWorkerMetricsTableBillingMode); + assertEquals(coordinatorStateConfig.readCapacity(), 123); + assertEquals(coordinatorStateConfig.writeCapacity(), 123); + } + + @Test + public void testCoordinatorStateTableConfigUsesDefaults() { + final CoordinatorConfig.CoordinatorStateTableConfig defaultCoordinatorStateTableConfig = + getTestConfigsBuilder().coordinatorConfig().coordinatorStateTableConfig(); + + final MultiLangDaemonConfiguration configuration = baseConfiguration(); + configuration.setCoordinatorStateWriteCapacity(defaultCoordinatorStateTableConfig.writeCapacity() + 12345); + + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + + final CoordinatorConfig.CoordinatorStateTableConfig resolvedCoordinatorStateTableConfig = + resolvedConfiguration.coordinatorConfig.coordinatorStateTableConfig(); + + assertNotEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig); + + // apart from the single updated configuration, all other config values should be equal to the default + resolvedCoordinatorStateTableConfig.writeCapacity(defaultCoordinatorStateTableConfig.writeCapacity()); + assertEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig); + } + @Test public void testSetLeaseTablePitrEnabledToTrue() { MultiLangDaemonConfiguration configuration = baseConfiguration(); @@ -266,4 +471,43 @@ public void testFanoutConfigSetConsumerName() { assertThat(fanOutConfig.consumerArn(), equalTo(consumerArn)); } + + @Test + public void testClientVersionConfig() { + final CoordinatorConfig.ClientVersionConfig testClientVersionConfig = + CoordinatorConfig.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X; + + final MultiLangDaemonConfiguration configuration = baseConfiguration(); + configuration.setClientVersionConfig(testClientVersionConfig); + + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + configuration.resolvedConfiguration(shardRecordProcessorFactory); + + final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig; + + assertEquals(testClientVersionConfig, coordinatorConfig.clientVersionConfig()); + } + + @Test + public void testClientVersionConfigUsesDefault() { + final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration = + baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory); + + final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig; + + assertEquals( + getTestConfigsBuilder().coordinatorConfig().clientVersionConfig(), + coordinatorConfig.clientVersionConfig()); + } + + private ConfigsBuilder getTestConfigsBuilder() { + return new ConfigsBuilder( + DUMMY_STREAM_NAME, + DUMMY_APPLICATION_NAME, + Mockito.mock(KinesisAsyncClient.class), + Mockito.mock(DynamoDbAsyncClient.class), + Mockito.mock(CloudWatchAsyncClient.class), + "dummyWorkerIdentifier", + shardRecordProcessorFactory); + } } diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java new file mode 100644 index 000000000..53920bf7c --- /dev/null +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/PropertiesMappingE2ETest.java @@ -0,0 +1,251 @@ +package software.amazon.kinesis.multilang.config; + +import java.io.IOException; +import java.time.Duration; + +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig; +import software.amazon.kinesis.multilang.MultiLangDaemonConfig; +import software.amazon.kinesis.multilang.config.MultiLangDaemonConfiguration.ResolvedConfiguration; +import software.amazon.kinesis.processor.ShardRecordProcessor; +import software.amazon.kinesis.processor.ShardRecordProcessorFactory; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class PropertiesMappingE2ETest { + private static final String PROPERTIES_FILE = "multilang.properties"; + private static final String PROPERTIES_FILE_V3 = "multilangv3.properties"; + + @Test + public void testKclV3PropertiesMapping() throws IOException { + final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE); + + final ResolvedConfiguration kclV3Config = + config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory()); + + assertEquals( + ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X, + kclV3Config.coordinatorConfig.clientVersionConfig()); + + assertEquals( + "MultiLangTest-CoordinatorState-CustomName", + kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName()); + assertEquals( + BillingMode.PROVISIONED, + kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode()); + assertEquals( + 1000, + kclV3Config.coordinatorConfig.coordinatorStateTableConfig().readCapacity()); + assertEquals( + 500, kclV3Config.coordinatorConfig.coordinatorStateTableConfig().writeCapacity()); + + assertEquals( + 10000L, + kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis()); + assertFalse( + kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled()); + + assertEquals( + 5000L, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .inMemoryWorkerMetricsCaptureFrequencyMillis()); + assertEquals( + 60000L, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsReporterFreqInMillis()); + assertEquals( + 50, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .noOfPersistedMetricsPerWorkerMetrics()); + assertTrue(kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .disableWorkerMetrics()); + assertEquals( + 10000, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .maxThroughputPerHostKBps()); + assertEquals( + 90, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .dampeningPercentage()); + assertEquals( + 5, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .reBalanceThresholdPercentage()); + assertFalse(kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .allowThroughputOvershoot()); + assertEquals( + Duration.ofHours(12), + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .staleWorkerMetricsEntryCleanupDuration()); + assertEquals( + 5, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .varianceBalancingFrequency()); + assertEquals( + 0.18D, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsEMAAlpha()); + + assertEquals( + "MultiLangTest-WorkerMetrics-CustomName", + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .tableName()); + assertEquals( + BillingMode.PROVISIONED, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .billingMode()); + assertEquals( + 250, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .readCapacity()); + assertEquals( + 90, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .writeCapacity()); + } + + @Test + public void testKclV3PropertiesMappingForDefaultValues() throws IOException { + final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE_V3); + + final ResolvedConfiguration kclV3Config = + config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory()); + + assertEquals(ClientVersionConfig.CLIENT_VERSION_CONFIG_3X, kclV3Config.coordinatorConfig.clientVersionConfig()); + + assertEquals( + "MultiLangTest-CoordinatorState", + kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName()); + assertEquals( + BillingMode.PAY_PER_REQUEST, + kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode()); + + assertEquals( + 30_000L, + kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis()); + assertTrue( + kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled()); + + assertEquals( + 1000L, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .inMemoryWorkerMetricsCaptureFrequencyMillis()); + assertEquals( + 30000L, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsReporterFreqInMillis()); + assertEquals( + 10, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .noOfPersistedMetricsPerWorkerMetrics()); + assertFalse(kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .disableWorkerMetrics()); + assertEquals( + Double.MAX_VALUE, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .maxThroughputPerHostKBps()); + assertEquals( + 60, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .dampeningPercentage()); + assertEquals( + 10, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .reBalanceThresholdPercentage()); + assertTrue(kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .allowThroughputOvershoot()); + assertEquals( + Duration.ofDays(1), + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .staleWorkerMetricsEntryCleanupDuration()); + assertEquals( + 3, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .varianceBalancingFrequency()); + assertEquals( + 0.5D, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsEMAAlpha()); + + assertEquals( + "MultiLangTest-WorkerMetricStats", + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .tableName()); + assertEquals( + BillingMode.PAY_PER_REQUEST, + kclV3Config + .leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .workerMetricsTableConfig() + .billingMode()); + } + + private static class TestRecordProcessorFactory implements ShardRecordProcessorFactory { + @Override + public ShardRecordProcessor shardRecordProcessor() { + return null; + } + } +} diff --git a/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java new file mode 100644 index 000000000..71ada01f1 --- /dev/null +++ b/amazon-kinesis-client-multilang/src/test/java/software/amazon/kinesis/multilang/config/WorkerUtilizationAwareAssignmentConfigBeanTest.java @@ -0,0 +1,68 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package software.amazon.kinesis.multilang.config; + +import java.util.Optional; + +import org.apache.commons.beanutils.BeanUtilsBean; +import org.apache.commons.beanutils.ConvertUtilsBean; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; +import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; +import software.amazon.kinesis.retrieval.polling.PollingConfig; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +@RunWith(MockitoJUnitRunner.class) +public class WorkerUtilizationAwareAssignmentConfigBeanTest { + + @Mock + private KinesisAsyncClient kinesisAsyncClient; + + @Test + public void testAllPropertiesTransit() { + PollingConfigBean pollingConfigBean = new PollingConfigBean(); + pollingConfigBean.setIdleTimeBetweenReadsInMillis(1000); + pollingConfigBean.setMaxGetRecordsThreadPool(20); + pollingConfigBean.setMaxRecords(5000); + pollingConfigBean.setRetryGetRecordsInSeconds(30); + + ConvertUtilsBean convertUtilsBean = new ConvertUtilsBean(); + BeanUtilsBean utilsBean = new BeanUtilsBean(convertUtilsBean); + + MultiLangDaemonConfiguration multiLangDaemonConfiguration = + new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean); + multiLangDaemonConfiguration.setStreamName("test-stream"); + + PollingConfig pollingConfig = pollingConfigBean.build(kinesisAsyncClient, multiLangDaemonConfiguration); + + assertThat(pollingConfig.kinesisClient(), equalTo(kinesisAsyncClient)); + assertThat(pollingConfig.streamName(), equalTo(multiLangDaemonConfiguration.getStreamName())); + assertThat( + pollingConfig.idleTimeBetweenReadsInMillis(), + equalTo(pollingConfigBean.getIdleTimeBetweenReadsInMillis())); + assertThat( + pollingConfig.maxGetRecordsThreadPool(), + equalTo(Optional.of(pollingConfigBean.getMaxGetRecordsThreadPool()))); + assertThat(pollingConfig.maxRecords(), equalTo(pollingConfigBean.getMaxRecords())); + assertThat( + pollingConfig.retryGetRecordsInSeconds(), + equalTo(Optional.of(pollingConfigBean.getRetryGetRecordsInSeconds()))); + } +} diff --git a/amazon-kinesis-client-multilang/src/test/resources/multilang.properties b/amazon-kinesis-client-multilang/src/test/resources/multilang.properties index 34cb0c1a3..93e426218 100644 --- a/amazon-kinesis-client-multilang/src/test/resources/multilang.properties +++ b/amazon-kinesis-client-multilang/src/test/resources/multilang.properties @@ -17,10 +17,12 @@ streamName = kclpysample applicationName = MultiLangTest # Users can change the credentials provider the KCL will use to retrieve credentials. -# The DefaultAWSCredentialsProviderChain checks several other providers, which is +# Expected key name (case-sensitive): +# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch +# The DefaultCredentialsProvider checks several other providers, which is # described here: -# http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html -AWSCredentialsProvider = DefaultAWSCredentialsProviderChain +# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html +AwsCredentialsProvider = DefaultCredentialsProvider # Appended to the user agent of the KCL. Does not impact the functionality of the # KCL in any other way. @@ -91,3 +93,73 @@ validateSequenceNumberBeforeCheckpointing = true # active threads set to the provided value. If a non-positive integer or no # value is provided a CachedThreadPool is used. maxActiveThreads = -1 + +################### KclV3 configurations ################### +# Coordinator config +# Version the KCL needs to operate in. For more details check the KCLv3 migration +# documentation. Default is CLIENT_VERSION_CONFIG_3X +clientVersionConfig = CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x +# TODO: include table deletion protection and pitr config once its added +# Configurations to control how the CoordinatorState DDB table is created +# Default name is applicationName-CoordinatorState in PAY_PER_REQUEST +coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName +coordinatorStateBillingMode = PROVISIONED +coordinatorStateReadCapacity = 1000 +coordinatorStateWriteCapacity = 500 + +# Graceful handoff config - tuning of the shutdown behavior during lease transfers +# default values are 30000 and true respectively +gracefulLeaseHandoffTimeoutMillis = 10000 +isGracefulLeaseHandoffEnabled = false + +# WorkerMetricStats table config - control how the DDB table is created +## Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST +# TODO: include table deletion protection and pitr config once its added +workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName +workerMetricsBillingMode = PROVISIONED +workerMetricsReadCapacity = 250 +workerMetricsWriteCapacity = 90 + +# WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm +# +# frequency of capturing worker metrics in memory. Default is 1s +inMemoryWorkerMetricsCaptureFrequencyMillis = 5000 +# frequency of reporting worker metric stats to storage. Default is 30s +workerMetricsReporterFreqInMillis = 60000 +# No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10 +noOfPersistedMetricsPerWorkerMetrics = 50 +# Disable use of worker metrics to balance lease, default is false. +# If it is true, the algorithm balances lease based on worker's processing throughput. +disableWorkerMetrics = true +# Max throughput per host 10 MBps, to limit processing to the given value +# Default is unlimited. +maxThroughputPerHostKBps = 10000 +# Dampen the load that is rebalanced during lease re-balancing, default is 60% +dampeningPercentage = 90 +# Configures the allowed variance range for worker utilization. The upper +# limit is calculated as average * (1 + reBalanceThresholdPercentage/100). +# The lower limit is average * (1 - reBalanceThresholdPercentage/100). If +# any worker's utilization falls outside this range, lease re-balancing is +# triggered. The re-balancing algorithm aims to bring variance within the +# specified range. It also avoids thrashing by ensuring the utilization of +# the worker receiving the load after re-balancing doesn't exceed the fleet +# average. This might cause no re-balancing action even the utilization is +# out of the variance range. The default value is 10, representing +/-10% +# variance from the average value. +reBalanceThresholdPercentage = 5 +# Whether at-least one lease must be taken from a high utilization worker +# during re-balancing when there is no lease assigned to that worker which has +# throughput is less than or equal to the minimum throughput that needs to be +# moved away from that worker to bring the worker back into the allowed variance. +# Default is true. +allowThroughputOvershoot = false +# Lease assignment is performed every failoverTimeMillis but re-balance will +# be attempted only once in 5 times based on the below config. Default is 3. +varianceBalancingFrequency = 5 +# Alpha value used for calculating exponential moving average of worker's metricStats. +workerMetricsEMAAlpha = 0.18 +# Duration after which workerMetricStats entry from WorkerMetricStats table will +# be cleaned up. +# Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days) +# Refer to Duration.parse javadocs for more details +staleWorkerMetricsEntryCleanupDuration = PT12H diff --git a/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties b/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties new file mode 100644 index 000000000..690c7a1c3 --- /dev/null +++ b/amazon-kinesis-client-multilang/src/test/resources/multilangv3.properties @@ -0,0 +1,169 @@ +# The script that abides by the multi-language protocol. This script will +# be executed by the MultiLangDaemon, which will communicate with this script +# over STDIN and STDOUT according to the multi-language protocol. +executableName = sample_kclpy_app.py + +# The Stream arn: arn:aws:kinesis:::stream/ +# Important: streamArn takes precedence over streamName if both are set +streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample + +# The name of an Amazon Kinesis stream to process. +# Important: streamArn takes precedence over streamName if both are set +streamName = kclpysample + +# Used by the KCL as the name of this application. Will be used as the name +# of an Amazon DynamoDB table which will store the lease and checkpoint +# information for workers with this application name +applicationName = MultiLangTest + +# Users can change the credentials provider the KCL will use to retrieve credentials. +# Expected key name (case-sensitive): +# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch +# The DefaultCredentialsProvider checks several other providers, which is +# described here: +# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html +AwsCredentialsProvider = DefaultCredentialsProvider + +# Appended to the user agent of the KCL. Does not impact the functionality of the +# KCL in any other way. +processingLanguage = python/3.8 + +# Valid options at TRIM_HORIZON or LATEST. +# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax +initialPositionInStream = TRIM_HORIZON + +# To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended', +# and uncomment below line with right timestamp value. +# See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax +#initialPositionInStreamExtended = 1636609142 + +# The following properties are also available for configuring the KCL Worker that is created +# by the MultiLangDaemon. + +# The KCL defaults to us-east-1 +regionName = us-east-1 + +# Fail over time in milliseconds. A worker which does not renew it's lease within this time interval +# will be regarded as having problems and it's shards will be assigned to other workers. +# For applications that have a large number of shards, this msy be set to a higher number to reduce +# the number of DynamoDB IOPS required for tracking leases +failoverTimeMillis = 10000 + +# A worker id that uniquely identifies this worker among all workers using the same applicationName +# If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself. +workerId = "workerId" + +# Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks. +shardSyncIntervalMillis = 60000 + +# Max records to fetch from Kinesis in a single GetRecords call. +maxRecords = 10000 + +# Idle time between record reads in milliseconds. +idleTimeBetweenReadsInMillis = 1000 + +# Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while) +callProcessRecordsEvenForEmptyRecordList = false + +# Interval in milliseconds between polling to check for parent shard completion. +# Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on +# completion of parent shards). +parentShardPollIntervalMillis = 10000 + +# Cleanup leases upon shards completion (don't wait until they expire in Kinesis). +# Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try +# to delete the ones we don't need any longer. +cleanupLeasesUponShardCompletion = true + +# Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures). +taskBackoffTimeMillis = 500 + +# Buffer metrics for at most this long before publishing to CloudWatch. +metricsBufferTimeMillis = 10000 + +# Buffer at most this many metrics before publishing to CloudWatch. +metricsMaxQueueSize = 10000 + +# KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls +# to RecordProcessorCheckpointer#checkpoint(String) by default. +validateSequenceNumberBeforeCheckpointing = true + +# The maximum number of active threads for the MultiLangDaemon to permit. +# If a value is provided then a FixedThreadPool is used with the maximum +# active threads set to the provided value. If a non-positive integer or no +# value is provided a CachedThreadPool is used. +maxActiveThreads = -1 + +################### KclV3 configurations ################### +# Coordinator config +clientVersionConfig = CLIENT_VERSION_CONFIG_3x + +## Let all other config be defaults +## TODO: include table deletion protection and pitr config once its added +## Configurations to control how the CoordinatorState DDB table is created +## Default name is applicationName-CoordinatorState in PAY_PER_REQUEST +#coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName +#coordinatorStateBillingMode = PROVISIONED +#coordinatorStateReadCapacity = 1000 +#coordinatorStateWriteCapacity = 500 +# +## Graceful handoff config - tuning of the shutdown behavior during lease transfers +## default values are 30000 and true respectively +#gracefulLeaseHandoffTimeoutMillis = 10000 +#isGracefulLeaseHandoffEnabled = false +# +## WorkerMetricStats table config - control how the DDB table is created +### Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST +## TODO: include table deletion protection and pitr config once its added +#workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName +#workerMetricsBillingMode = PROVISIONED +#workerMetricsReadCapacity = 250 +#workerMetricsWriteCapacity = 90 +# +## WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm +## +## frequency of capturing worker metrics in memory. Default is 1s +#inMemoryWorkerMetricsCaptureFrequencyMillis = 5000 +## frequency of reporting worker metric stats to storage. Default is 30s +#workerMetricsReporterFreqInMillis = 60000 +## No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10. +## This provides historic values that are used to compute the workers current +## utilization using an exponential-moving-average. +#noOfPersistedMetricsPerWorkerMetrics = 50 +## Disable use of worker metrics to balance lease, default is false. +## If it is true, the algorithm balances lease based on worker's processing throughput. +#disableWorkerMetrics = true +## Max throughput per host 10 MBps, to limit processing to the given value +## Default is unlimited. +#maxThroughputPerHostKBps = 10000 +## Dampen the load that is rebalanced during lease re-balancing, default is 60% +#dampeningPercentage = 90 +## Configures the allowed variance range for worker utilization. The upper +## limit is calculated as average * (1 + reBalanceThresholdPercentage/100). +## The lower limit is average * (1 - reBalanceThresholdPercentage/100). If +## any worker's utilization falls outside this range, lease re-balancing is +## triggered. The re-balancing algorithm aims to bring variance within the +## specified range. It also avoids thrashing by ensuring the utilization of +## the worker receiving the load after re-balancing doesn't exceed the fleet +## average. This might cause no re-balancing action even the utilization is +## out of the variance range. The default value is 10, representing +/-10% +## variance from the average value. +#reBalanceThresholdPercentage = 5 +## Whether at-least one lease must be taken from a high utilization worker +## during re-balancing when there is no lease assigned to that worker which has +## throughput is less than or equal to the minimum throughput that needs to be +## moved away from that worker to bring the worker back into the allowed variance. +## Default is true. +#allowThroughputOvershoot = false +## Lease assignment is performed every failoverTimeMillis but re-balance will +## be attempted only once in 5 times based on the below config. Default is 3. +#varianceBalancingFrequency = 5 +## Alpha value used for calculating exponential moving average of worker's metricStats. +## Default is 0.5, a higher alpha value will make re-balancing more sensitive +## to recent metricStats. +#workerMetricsEMAAlpha = 0.18 +## Duration after which workerMetricStats entry from WorkerMetricStats table will +## be cleaned up. Default is 1 day. +## Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days) +## Refer to Duration.parse javadocs for more details +#staleWorkerMetricsEntryCleanupDuration = PT12H diff --git a/amazon-kinesis-client/pom.xml b/amazon-kinesis-client/pom.xml index b2efcae0a..567e0cdd1 100644 --- a/amazon-kinesis-client/pom.xml +++ b/amazon-kinesis-client/pom.xml @@ -23,7 +23,7 @@ software.amazon.kinesis amazon-kinesis-client-pom - 2.6.1-SNAPSHOT + 3.0.0 amazon-kinesis-client @@ -68,6 +68,18 @@ dynamodb ${awssdk.version} + + + software.amazon.awssdk + dynamodb-enhanced + ${awssdk.version} + + + + com.amazonaws + dynamodb-lock-client + 1.3.0 + software.amazon.awssdk cloudwatch @@ -82,6 +94,12 @@ software.amazon.glue schema-registry-serde ${gsr.version} + + + com.amazonaws + aws-java-sdk-sts + + software.amazon.glue @@ -103,11 +121,23 @@ commons-lang3 3.14.0 + + + commons-collections + commons-collections + 3.2.2 + org.slf4j slf4j-api ${slf4j.version} + + + org.jetbrains + annotations + 26.0.1 + io.reactivex.rxjava3 @@ -123,35 +153,47 @@ + + + org.junit.jupiter + junit-jupiter-api + 5.11.3 + test + junit junit 4.13.2 test - + + + org.junit.jupiter + junit-jupiter-params + 5.11.3 + test + + org.mockito - mockito-all - 1.10.19 + mockito-junit-jupiter + 3.12.4 test - org.hamcrest hamcrest-all 1.3 test - - - - - - - - - + + + + com.amazonaws + DynamoDBLocal + 1.25.0 + test + ch.qos.logback logback-classic @@ -162,11 +204,11 @@ - - - - - + + + + + @@ -203,20 +245,20 @@ - - org.xolstice.maven.plugins - protobuf-maven-plugin - 0.6.1 - - - - compile - - - - - com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} - + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.6.1 + + + + compile + + + + + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} + org.apache.maven.plugins diff --git a/amazon-kinesis-client/scripts/KclMigrationTool.py b/amazon-kinesis-client/scripts/KclMigrationTool.py new file mode 100644 index 000000000..f5c3149f9 --- /dev/null +++ b/amazon-kinesis-client/scripts/KclMigrationTool.py @@ -0,0 +1,610 @@ +""" +Copyright 2024 Amazon.com, Inc. or its affiliates. +Licensed under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import argparse +import time + +from enum import Enum +import boto3 +from botocore.config import Config +from botocore.exceptions import ClientError + +# DynamoDB table suffixes +DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX = "-CoordinatorState" +DEFAULT_WORKER_METRICS_TABLE_SUFFIX = "-WorkerMetricStats" + +# DynamoDB attribute names and values +CLIENT_VERSION_ATTR = 'cv' +TIMESTAMP_ATTR = 'mts' +MODIFIED_BY_ATTR = 'mb' +HISTORY_ATTR = 'h' +MIGRATION_KEY = "Migration3.0" + +# GSI constants +GSI_NAME = 'LeaseOwnerToLeaseKeyIndex' +GSI_DELETION_WAIT_TIME_SECONDS = 120 + +config = Config( + # TODO: parameterize + region_name = 'us-east-1', + retries = { + 'max_attempts': 10, + 'mode': 'standard' + } +) + +# TODO: validate where these values came from. None of the originals seem to work. +class KclClientVersion(Enum): + VERSION_2X = "CLIENT_VERSION_2x" + UPGRADE_FROM_2X = "CLIENT_VERSION_UPGRADE_FROM_2x" + VERSION_3X_WITH_ROLLBACK = "CLIENT_VERSION_3x_WITH_ROLLBACK" + VERSION_3X = "CLIENT_VERSION_3x" + + def __str__(self): + return self.value + + +def get_time_in_millis(): + return str(round(time.time() * 1000)) + + +def is_valid_version(version, mode): + """ + Validate if the given version is valid for the specified mode + + :param version: The KCL client version to validate + :param mode: Either 'rollback' or 'rollforward' + :return: True if the version is valid for the given mode, False otherwise + """ + if mode == 'rollback': + if version == KclClientVersion.VERSION_2X.value: + print("Your KCL application already runs in a mode compatible with KCL 2.x. You can deploy the code with the previous KCL version if you still experience an issue.") + return True + if version in [KclClientVersion.UPGRADE_FROM_2X.value, + KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]: + return True + if version == KclClientVersion.VERSION_3X.value: + print("Cannot roll back the KCL application." + " It is not in a state that supports rollback.") + return False + print("Migration to KCL 3.0 not in progress or application_name / coordinator_state_table_name is incorrect." + " Please double check and run again with correct arguments.") + return False + + if mode == 'rollforward': + if version == KclClientVersion.VERSION_2X.value: + return True + if version in [KclClientVersion.UPGRADE_FROM_2X.value, + KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]: + print("Cannot roll-forward application. It is not in a rolled back state.") + return False + if version == KclClientVersion.VERSION_3X.value: + print("Cannot roll-forward the KCL application." + " Application has already migrated.") + return False + print("Cannot roll-forward because migration to KCL 3.0 is not in progress or application_name" + " / coordinator_state_table_name is incorrect. Please double check and run again with correct arguments.") + return False + print(f"Invalid mode: {mode}. Mode must be either 'rollback' or 'rollforward'.") + return False + + +def handle_get_item_client_error(e, operation, table_name): + """ + Handle ClientError exceptions raised by get_item on given DynamoDB table + + :param e: The ClientError exception object + :param operation: Rollback or Roll-forward for logging the errors + :param table_name: The name of the DynamoDB table where the error occurred + """ + error_code = e.response['Error']['Code'] + error_message = e.response['Error']['Message'] + print(f"{operation} could not be performed.") + if error_code == 'ProvisionedThroughputExceededException': + print(f"Throughput exceeded even after retries: {error_message}") + else: + print(f"Unexpected client error occurred: {error_code} - {error_message}") + print("Please resolve the issue and run the KclMigrationTool again.") + + +def table_exists(dynamodb_client, table_name): + """ + Check if a DynamoDB table exists. + + :param dynamodb_client: Boto3 DynamoDB client + :param table_name: Name of the DynamoDB table to check + :return: True if the table exists, False otherwise + """ + try: + dynamodb_client.describe_table(TableName=table_name) + return True + except ClientError as e: + if e.response['Error']['Code'] == 'ResourceNotFoundException': + print(f"Table '{table_name}' does not exist.") + return False + print(f"An error occurred while checking table '{table_name}': {e}.") + return False + + +def validate_tables(dynamodb_client, operation, coordinator_state_table_name, lease_table_name=None): + """ + Validate the existence of DynamoDB tables required for KCL operations + + :param dynamodb_client: A boto3 DynamoDB client object + :param operation: Rollback or Roll-forward for logging + :param coordinator_state_table_name: Name of the coordinator state table + :param lease_table_name: Name of the DynamoDB KCL lease table (optional) + :return: True if all required tables exist, False otherwise + """ + if lease_table_name and not table_exists(dynamodb_client, lease_table_name): + print( + f"{operation} failed. Could not find a KCL Application DDB lease table " + f"with name {lease_table_name}. Please pass in the correct application_name " + "and/or lease_table_name that matches your KCL application configuration." + ) + return False + + if not table_exists(dynamodb_client, coordinator_state_table_name): + print( + f"{operation} failed. Could not find a coordinator state table " + f"{coordinator_state_table_name}. Please pass in the correct application_name or" + f" coordinator_state_table_name that matches your KCL application configuration." + ) + return False + + return True + + +def add_current_state_to_history(item, max_history=10): + """ + Adds the current state of a DynamoDB item to its history attribute. + Creates a new history entry from the current value and maintains a capped history list. + + :param item: DynamoDB item to add history to + :param max_history: Maximum number of history entries to maintain (default: 10) + :return: Updated history attribute as a DynamoDB-formatted dictionary + """ + # Extract current values + current_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown') + current_modified_by = item.get(MODIFIED_BY_ATTR, {}).get('S', 'Unknown') + current_time_in_millis = ( + item.get(TIMESTAMP_ATTR, {}).get('N', get_time_in_millis()) + ) + + # Create new history entry + new_entry = { + 'M': { + CLIENT_VERSION_ATTR: {'S': current_version}, + MODIFIED_BY_ATTR: {'S': current_modified_by}, + TIMESTAMP_ATTR: {'N': current_time_in_millis} + } + } + + # Get existing history or create new if doesn't exist + history_dict = item.get(f'{HISTORY_ATTR}', {'L': []}) + history_list = history_dict['L'] + + # Add new entry to the beginning of the list, capping at max_history + history_list.insert(0, new_entry) + history_list = history_list[:max_history] + + return history_dict + + +def get_current_state(dynamodb_client, table_name): + """ + Retrieve the current state from the DynamoDB table and prepare history update. + Fetches the current item from the specified DynamoDB table, + extracts the initial client version, and creates a new history entry. + + :param dynamodb_client: Boto3 DynamoDB client + :param table_name: Name of the DynamoDB table to query + :return: A tuple containing: + - initial_version (str): The current client version, or 'Unknown' if not found + - new_history (dict): Updated history including the current state + """ + response = dynamodb_client.get_item( + TableName=table_name, + Key={'key': {'S': MIGRATION_KEY}} + ) + item = response.get('Item', {}) + initial_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown') + new_history = add_current_state_to_history(item) + return initial_version, new_history + + +def rollback_client_version(dynamodb_client, table_name, history): + """ + Update the client version in the coordinator state table to initiate rollback. + + :param dynamodb_client: Boto3 DynamoDB client + :param table_name: Name of the coordinator state DDB table + :param history: Updated history attribute as a DynamoDB-formatted dictionary + :return: A tuple containing: + - success (bool): True if client version was successfully updated, False otherwise + - previous_version (str): The version that was replaced, or None if update failed + """ + try: + print(f"Rolling back client version in table '{table_name}'...") + update_response = dynamodb_client.update_item( + TableName=table_name, + Key={'key': {'S': MIGRATION_KEY}}, + UpdateExpression=( + f"SET {CLIENT_VERSION_ATTR} = :rollback_client_version, " + f"{TIMESTAMP_ATTR} = :updated_at, " + f"{MODIFIED_BY_ATTR} = :modifier, " + f"{HISTORY_ATTR} = :history" + ), + ConditionExpression=( + f"{CLIENT_VERSION_ATTR} IN (" + ":upgrade_from_2x_client_version, " + ":3x_with_rollback_client_version)" + ), + ExpressionAttributeValues={ + ':rollback_client_version': {'S': KclClientVersion.VERSION_2X.value}, + ':updated_at': {'N': get_time_in_millis()}, + ':modifier': {'S': 'KclMigrationTool-rollback'}, + ':history': history, + ':upgrade_from_2x_client_version': ( + {'S': KclClientVersion.UPGRADE_FROM_2X.value} + ), + ':3x_with_rollback_client_version': ( + {'S': KclClientVersion.VERSION_3X_WITH_ROLLBACK.value} + ), + }, + ReturnValues='UPDATED_OLD' + ) + replaced_item = update_response.get('Attributes', {}) + replaced_version = replaced_item.get('cv', {}).get('S', '') + return True, replaced_version + except ClientError as e: + if e.response['Error']['Code'] == 'ConditionalCheckFailedException': + print("Unable to rollback, as application is not in a state that allows rollback." + "Ensure that the given application_name or coordinator_state_table_name is correct and" + " you have followed all prior migration steps.") + else: + print(f"An unexpected error occurred while rolling back: {str(e)}" + "Please resolve and run this migration script again.") + return False, None + + +def rollfoward_client_version(dynamodb_client, table_name, history): + """ + Update the client version in the coordinator state table to initiate roll-forward + conditionally if application is currently in rolled back state. + + :param dynamodb_client: Boto3 DynamoDB client + :param table_name: Name of the coordinator state DDB table + :param history: Updated history attribute as a DynamoDB-formatted dictionary + :return: True if client version was successfully updated, False otherwise + """ + try: + # Conditionally update client version + dynamodb_client.update_item( + TableName=table_name, + Key={'key': {'S': MIGRATION_KEY}}, + UpdateExpression= ( + f"SET {CLIENT_VERSION_ATTR} = :rollforward_version, " + f"{TIMESTAMP_ATTR} = :updated_at, " + f"{MODIFIED_BY_ATTR} = :modifier, " + f"{HISTORY_ATTR} = :new_history" + ), + ConditionExpression=f"{CLIENT_VERSION_ATTR} = :kcl_2x_version", + ExpressionAttributeValues={ + ':rollforward_version': {'S': KclClientVersion.UPGRADE_FROM_2X.value}, + ':updated_at': {'N': get_time_in_millis()}, + ':modifier': {'S': 'KclMigrationTool-rollforward'}, + ':new_history': history, + ':kcl_2x_version': {'S': KclClientVersion.VERSION_2X.value}, + } + ) + print("Roll-forward has been initiated. KCL application will monitor for 3.0 readiness and" + " automatically switch to 3.0 functionality when readiness criteria have been met.") + except ClientError as e: + if e.response['Error']['Code'] == 'ConditionalCheckFailedException': + print("Unable to roll-forward because application is not in rolled back state." + " Ensure that the given application_name or coordinator_state_table_name is correct" + " and you have followed all prior migration steps.") + else: + print(f"Unable to roll-forward due to error: {str(e)}. " + "Please resolve and run this migration script again.") + except Exception as e: + print(f"Unable to roll-forward due to error: {str(e)}. " + "Please resolve and run this migration script again.") + + +def delete_gsi_if_exists(dynamodb_client, table_name): + """ + Deletes GSI on given lease table if it exists. + + :param dynamodb_client: Boto3 DynamoDB client + :param table_name: Name of lease table to remove GSI from + """ + try: + gsi_present = False + response = dynamodb_client.describe_table(TableName=table_name) + if 'GlobalSecondaryIndexes' in response['Table']: + gsi_list = response['Table']['GlobalSecondaryIndexes'] + for gsi in gsi_list: + if gsi['IndexName'] == GSI_NAME: + gsi_present = True + break + + if not gsi_present: + print(f"GSI {GSI_NAME} is not present on lease table {table_name}. It may already be successfully" + " deleted. Or if lease table name is incorrect, please re-run the KclMigrationTool with correct" + " application_name or lease_table_name.") + return + except ClientError as e: + if e.response['Error']['Code'] == 'ResourceNotFoundException': + print(f"Lease table {table_name} does not exist, please check application_name or lease_table_name" + " configuration and try again.") + return + else: + print(f"An unexpected error occurred while checking if GSI {GSI_NAME} exists" + f" on lease table {table_name}: {str(e)}. Please rectify the error and try again.") + return + + print(f"Deleting GSI '{GSI_NAME}' from table '{table_name}'...") + try: + dynamodb_client.update_table( + TableName=table_name, + GlobalSecondaryIndexUpdates=[ + { + 'Delete': { + 'IndexName': GSI_NAME + } + } + ] + ) + except ClientError as e: + if e.response['Error']['Code'] == 'ResourceNotFoundException': + print(f"{GSI_NAME} not found or table '{table_name}' not found.") + elif e.response['Error']['Code'] == 'ResourceInUseException': + print(f"Unable to delete GSI: '{table_name}' is currently being modified.") + except Exception as e: + print(f"An unexpected error occurred while deleting GSI {GSI_NAME} on lease table {table_name}: {str(e)}." + " Please manually confirm the GSI is removed from the lease table, or" + " resolve the error and rerun the migration script.") + + +def delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name): + """ + Deletes worker metrics table based on application name, if it exists. + + :param dynamodb_client: Boto3 DynamoDB client + :param worker_metrics_table_name: Name of the DynamoDB worker metrics table + """ + try: + dynamodb_client.describe_table(TableName=worker_metrics_table_name) + except ClientError as e: + if e.response['Error']['Code'] == 'ResourceNotFoundException': + print(f"Worker metrics table {worker_metrics_table_name} does not exist." + " It may already be successfully deleted. Please check that the application_name" + " or worker_metrics_table_name is correct. If not, correct this and rerun the migration script.") + return + else: + print(f"An unexpected error occurred when checking if {worker_metrics_table_name} table exists: {str(e)}." + " Please manually confirm the table is deleted, or resolve the error" + " and rerun the migration script.") + return + + print(f"Deleting worker metrics table {worker_metrics_table_name}...") + try: + dynamodb_client.delete_table(TableName=worker_metrics_table_name) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print(f"No permissions to delete table {worker_metrics_table_name}. Please manually delete it if you" + " want to avoid any charges until you are ready to rollforward with migration.") + else: + print(f"An unexpected client error occurred while deleting worker metrics table: {str(e)}." + " Please manually confirm the table is deleted, or resolve the error" + " and rerun the migration script.") + except Exception as e: + print(f"An unexpected error occurred while deleting worker metrics table: {str(e)}." + " Please manually confirm the table is deleted, or resolve the error" + " and rerun the migration script.") + + +def perform_rollback(dynamodb_client, lease_table_name, coordinator_state_table_name, worker_metrics_table_name): + """ + Perform KCL 3.0 migration rollback by updating MigrationState for the KCL application. + Rolls client version back, removes GSI from lease table, deletes worker metrics table. + + :param dynamodb_client: Boto3 DynamoDB client + :param coordinator_state_table_name: Name of the DynamoDB coordinator state table + :param coordinator_state_table_name: Name of the DynamoDB coordinator state table + :param worker_metrics_table_name: Name of the DynamoDB worker metrics table + """ + if not validate_tables(dynamodb_client, "Rollback", coordinator_state_table_name, lease_table_name): + return + + try: + initial_version, new_history = get_current_state(dynamodb_client, + coordinator_state_table_name) + except ClientError as e: + handle_get_item_client_error(e, "Rollback", coordinator_state_table_name) + return + + if not is_valid_version(version=initial_version, mode='rollback'): + return + + # 1. Rollback client version + if initial_version != KclClientVersion.VERSION_2X.value: + rollback_succeeded, initial_version = rollback_client_version( + dynamodb_client, coordinator_state_table_name, new_history + ) + if not rollback_succeeded: + return + + print(f"Waiting for {GSI_DELETION_WAIT_TIME_SECONDS} seconds before cleaning up KCL 3.0 resources after rollback...") + time.sleep(GSI_DELETION_WAIT_TIME_SECONDS) + + # 2. Delete the GSI + delete_gsi_if_exists(dynamodb_client, lease_table_name) + + # 3. Delete worker metrics table + delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name) + + # Log success + if initial_version == KclClientVersion.UPGRADE_FROM_2X.value: + print("\nRollback completed. Your application was running 2x compatible functionality.") + print("Please rollback to your previous application binaries by deploying the code with your previous KCL version.") + elif initial_version == KclClientVersion.VERSION_3X_WITH_ROLLBACK.value: + print("\nRollback completed. Your KCL Application was running 3x functionality and will rollback to 2x compatible functionality.") + print("If you don't see mitigation after a short period of time," + " please rollback to your previous application binaries by deploying the code with your previous KCL version.") + elif initial_version == KclClientVersion.VERSION_2X.value: + print("\nApplication was already rolled back. Any KCLv3 resources that could be deleted were cleaned up" + " to avoid charges until the application can be rolled forward with migration.") + + +def perform_rollforward(dynamodb_client, coordinator_state_table_name): + """ + Perform KCL 3.0 migration roll-forward by updating MigrationState for the KCL application + + :param dynamodb_client: Boto3 DynamoDB client + :param coordinator_state_table_name: Name of the DynamoDB table + """ + if not validate_tables(dynamodb_client, "Roll-forward", coordinator_state_table_name): + return + + try: + initial_version, new_history = get_current_state(dynamodb_client, + coordinator_state_table_name) + except ClientError as e: + handle_get_item_client_error(e, "Roll-forward", coordinator_state_table_name) + return + + if not is_valid_version(version=initial_version, mode='rollforward'): + return + + rollfoward_client_version(dynamodb_client, coordinator_state_table_name, new_history) + + +def run_kcl_migration(mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name): + """ + Update the MigrationState in CoordinatorState DDB Table + + :param mode: Either 'rollback' or 'rollforward' + :param lease_table_name: Name of the DynamoDB KCL lease table + :param coordinator_state_table_name: Name of the DynamoDB coordinator state table + :param worker_metrics_table_name: Name of the DynamoDB worker metrics table + """ + dynamodb_client = boto3.client('dynamodb', config=config) + + if mode == "rollback": + perform_rollback( + dynamodb_client, + lease_table_name, + coordinator_state_table_name, + worker_metrics_table_name + ) + elif mode == "rollforward": + perform_rollforward(dynamodb_client, coordinator_state_table_name) + else: + print(f"Invalid mode: {mode}. Please use 'rollback' or 'rollforward'.") + + +def validate_args(args): + if args.mode == 'rollforward': + if not (args.application_name or args.coordinator_state_table_name): + raise ValueError( + "For rollforward mode, either application_name or " + "coordinator_state_table_name must be provided." + ) + else: + if args.application_name: + return + + if not (args.lease_table_name and + args.coordinator_state_table_name and + args.worker_metrics_table_name): + raise ValueError( + "For rollback mode, either application_name or all three table names " + "(lease_table_name, coordinator_state_table_name, and " + "worker_metrics_table_name) must be provided." + ) + +def process_table_names(args): + """ + Process command line arguments to determine table names based on mode. + Args: + args: Parsed command line arguments + Returns: + tuple: (mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name) + """ + mode_input = args.mode + application_name_input = args.application_name + + coordinator_state_table_name_input = (args.coordinator_state_table_name or + application_name_input + DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX) + lease_table_name_input = None + worker_metrics_table_name_input = None + + if mode_input == "rollback": + lease_table_name_input = args.lease_table_name or application_name_input + worker_metrics_table_name_input = (args.worker_metrics_table_name or + application_name_input + DEFAULT_WORKER_METRICS_TABLE_SUFFIX) + + return (mode_input, + lease_table_name_input, + coordinator_state_table_name_input, + worker_metrics_table_name_input) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description= + """ + KCL Migration Tool + This tool facilitates the migration and rollback processes for Amazon KCLv3 applications. + + Before running this tool: + 1. Ensure you have the necessary AWS permissions configured to access and modify the following: + - KCL application DynamoDB tables (lease table and coordinator state table) + + 2. Verify that your AWS credentials are properly set up in your environment or AWS config file. + + 3. Confirm that you have the correct KCL application name and lease table name (if configured in KCL). + + Usage: + This tool supports two main operations: rollforward (upgrade) and rollback. + For detailed usage instructions, use the -h or --help option. + """, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--mode", choices=['rollback', 'rollforward'], required=True, + help="Mode of operation: rollback or rollforward") + parser.add_argument("--application_name", + help="Name of the KCL application. This must match the application name " + "used in the KCL Library configurations.") + parser.add_argument("--lease_table_name", + help="Name of the DynamoDB lease table (defaults to applicationName)." + " If LeaseTable name was specified for the application as part of " + "the KCL configurations, the same name must be passed here.") + parser.add_argument("--coordinator_state_table_name", + help="Name of the DynamoDB coordinator state table " + "(defaults to applicationName-CoordinatorState)." + " If coordinator state table name was specified for the application " + "as part of the KCL configurations, the same name must be passed here.") + parser.add_argument("--worker_metrics_table_name", + help="Name of the DynamoDB worker metrics table " + "(defaults to applicationName-WorkerMetricStats)." + " If worker metrics table name was specified for the application " + "as part of the KCL configurations, the same name must be passed here.") + + args = parser.parse_args() + validate_args(args) + run_kcl_migration(*process_table_names(args)) diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java index 2838d62dc..fcaec1977 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/ConfigsBuilder.java @@ -256,7 +256,8 @@ public CoordinatorConfig coordinatorConfig() { * @return LeaseManagementConfig */ public LeaseManagementConfig leaseManagementConfig() { - return new LeaseManagementConfig(tableName(), dynamoDBClient(), kinesisClient(), workerIdentifier()); + return new LeaseManagementConfig( + tableName(), applicationName(), dynamoDBClient(), kinesisClient(), workerIdentifier()); } /** diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java new file mode 100644 index 000000000..4507d9616 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/DdbTableConfig.java @@ -0,0 +1,76 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.common; + +import java.util.Collection; +import java.util.Collections; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.Tag; + +/** + * Configurations of a DDB table created by KCL for its internal operations. + */ +@Data +@Accessors(fluent = true) +@NoArgsConstructor +public class DdbTableConfig { + + protected DdbTableConfig(final String applicationName, final String tableSuffix) { + this.tableName = applicationName + "-" + tableSuffix; + } + + /** + * name to use for the DDB table. If null, it will default to + * applicationName-tableSuffix. If multiple KCL applications + * run in the same account, a unique tableName must be provided. + */ + private String tableName; + + /** + * Billing mode used to create the DDB table. + */ + private BillingMode billingMode = BillingMode.PAY_PER_REQUEST; + + /** + * read capacity to provision during DDB table creation, + * if billing mode is PROVISIONED. + */ + private long readCapacity; + + /** + * write capacity to provision during DDB table creation, + * if billing mode is PROVISIONED. + */ + private long writeCapacity; + + /** + * Flag to enable Point in Time Recovery on the DDB table. + */ + private boolean pointInTimeRecoveryEnabled = false; + + /** + * Flag to enable deletion protection on the DDB table. + */ + private boolean deletionProtectionEnabled = false; + + /** + * Tags to add to the DDB table. + */ + private Collection tags = Collections.emptyList(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java index 3c104d8da..5615ffc52 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/FutureUtils.java @@ -15,10 +15,13 @@ package software.amazon.kinesis.common; import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.function.Supplier; public class FutureUtils { @@ -31,4 +34,15 @@ public static T resolveOrCancelFuture(Future future, Duration timeout) throw te; } } + + public static T unwrappingFuture(final Supplier> supplier) { + try { + return supplier.get().join(); + } catch (CompletionException e) { + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } + throw e; + } + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java similarity index 51% rename from amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java rename to amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java index 14cb0eb5c..cffd2d6f3 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/TableConstants.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/common/StackTraceUtils.java @@ -1,5 +1,5 @@ /* - * Copyright 2019 Amazon.com, Inc. or its affiliates. + * Copyright 2024 Amazon.com, Inc. or its affiliates. * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at @@ -12,18 +12,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package software.amazon.kinesis.common; -package software.amazon.kinesis.leases.dynamodb; +public class StackTraceUtils { + public static String getPrintableStackTrace(final StackTraceElement[] stackTrace) { + final StringBuilder stackTraceString = new StringBuilder(); -import lombok.AccessLevel; -import lombok.NoArgsConstructor; + for (final StackTraceElement traceElement : stackTrace) { + stackTraceString.append("\tat ").append(traceElement).append("\n"); + } -/** - * This class is just a holder for initial lease table IOPs units. This class will be removed in a future release. - */ -@Deprecated -@NoArgsConstructor(access = AccessLevel.PRIVATE) -public class TableConstants { - public static final long DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY = 10L; - public static final long DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L; + return stackTraceString.toString(); + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java index e1835228c..163462593 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorConfig.java @@ -18,6 +18,7 @@ import lombok.Data; import lombok.NonNull; import lombok.experimental.Accessors; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.leases.NoOpShardPrioritization; import software.amazon.kinesis.leases.ShardPrioritization; @@ -27,6 +28,14 @@ @Data @Accessors(fluent = true) public class CoordinatorConfig { + + private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1; + + public CoordinatorConfig(final String applicationName) { + this.applicationName = applicationName; + this.coordinatorStateTableConfig = new CoordinatorStateTableConfig(applicationName); + } + /** * Application name used by checkpointer to checkpoint. * @@ -96,4 +105,53 @@ public class CoordinatorConfig { *

Default value: 1000L

*/ private long schedulerInitializationBackoffTimeMillis = 1000L; + + /** + * Version the KCL needs to operate in. For more details check the KCLv3 migration + * documentation. + */ + public enum ClientVersionConfig { + /** + * For an application that was operating with previous KCLv2.x, during + * upgrade to KCLv3.x, a migration process is needed due to the incompatible + * changes between the 2 versions. During the migration process, application + * must use ClientVersion=CLIENT_VERSION_COMPATIBLE_WITH_2x so that it runs in + * a compatible mode until all workers in the cluster have upgraded to the version + * running 3.x version (which is determined based on workers emitting WorkerMetricStats) + * Once all known workers are in 3.x mode, the library auto toggles to 3.x mode; + * but prior to that it runs in a mode compatible with 2.x workers. + * This version also allows rolling back to the compatible mode from the + * auto-toggled 3.x mode. + */ + CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X, + /** + * A new application operating with KCLv3.x will use this value. Also, an application + * that has successfully upgraded to 3.x version and no longer needs the ability + * for a rollback to a 2.x compatible version, will use this value. In this version, + * KCL will operate with new algorithms introduced in 3.x which is not compatible + * with prior versions. And once in this version, rollback to 2.x is not supported. + */ + CLIENT_VERSION_CONFIG_3X, + } + + /** + * Client version KCL must operate in, by default it operates in 3.x version which is not + * compatible with prior versions. + */ + private ClientVersionConfig clientVersionConfig = ClientVersionConfig.CLIENT_VERSION_CONFIG_3X; + + public static class CoordinatorStateTableConfig extends DdbTableConfig { + private CoordinatorStateTableConfig(final String applicationName) { + super(applicationName, "CoordinatorState"); + } + } + + /** + * Configuration to control how the CoordinatorState DDB table is created, such as table name, + * billing mode, provisioned capacity. If no table name is specified, the table name will + * default to applicationName-CoordinatorState. If no billing more is chosen, default is + * On-Demand. + */ + @NonNull + private final CoordinatorStateTableConfig coordinatorStateTableConfig; } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java new file mode 100644 index 000000000..65de6504a --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorState.java @@ -0,0 +1,52 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator; + +import java.util.Map; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +/** + * DataModel for CoordinatorState, this data model is used to store various state information required + * for coordination across the KCL worker fleet. Therefore, the model follows a flexible schema. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@Slf4j +@KinesisClientInternalApi +public class CoordinatorState { + public static final String COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME = "key"; + + /** + * Key value for the item in the CoordinatorState table used for leader + * election among the KCL workers. The attributes relevant to this item + * is dictated by the DDB Lock client implementation that is used to + * provide mutual exclusion. + */ + public static final String LEADER_HASH_KEY = "Leader"; + + private String key; + + private Map attributes; +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java new file mode 100644 index 000000000..36aefd0f1 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/CoordinatorStateDAO.java @@ -0,0 +1,425 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions.AmazonDynamoDBLockClientOptionsBuilder; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.MapUtils; +import software.amazon.awssdk.core.waiters.WaiterResponse; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeAction; +import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DynamoDbException; +import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement; +import software.amazon.awssdk.services.dynamodb.model.KeyType; +import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput; +import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType; +import software.amazon.awssdk.services.dynamodb.model.ScanRequest; +import software.amazon.awssdk.services.dynamodb.model.ScanResponse; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; +import software.amazon.awssdk.services.dynamodb.model.TableStatus; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest; +import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter; +import software.amazon.awssdk.utils.CollectionUtils; +import software.amazon.kinesis.common.FutureUtils; +import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig; +import software.amazon.kinesis.coordinator.migration.MigrationState; +import software.amazon.kinesis.leases.DynamoUtils; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.utils.DdbUtil; + +import static java.util.Objects.nonNull; +import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture; +import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME; + +/** + * Data Access Object to abstract accessing {@link CoordinatorState} from + * the CoordinatorState DDB table. + */ +@Slf4j +public class CoordinatorStateDAO { + private final DynamoDbAsyncClient dynamoDbAsyncClient; + private final DynamoDbClient dynamoDbSyncClient; + + private final CoordinatorStateTableConfig config; + + public CoordinatorStateDAO( + final DynamoDbAsyncClient dynamoDbAsyncClient, final CoordinatorStateTableConfig config) { + this.dynamoDbAsyncClient = dynamoDbAsyncClient; + this.config = config; + this.dynamoDbSyncClient = createDelegateClient(); + } + + public void initialize() throws DependencyException { + createTableIfNotExists(); + } + + private DynamoDbClient createDelegateClient() { + return new DynamoDbAsyncToSyncClientAdapter(dynamoDbAsyncClient); + } + + public AmazonDynamoDBLockClientOptionsBuilder getDDBLockClientOptionsBuilder() { + return AmazonDynamoDBLockClientOptions.builder(dynamoDbSyncClient, config.tableName()) + .withPartitionKeyName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME); + } + + /** + * List all the {@link CoordinatorState} from the DDB table synchronously + * + * @throws DependencyException if DynamoDB scan fails in an unexpected way + * @throws InvalidStateException if ddb table does not exist + * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity + * + * @return list of state + */ + public List listCoordinatorState() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + log.debug("Listing coordinatorState"); + + final ScanRequest request = + ScanRequest.builder().tableName(config.tableName()).build(); + + try { + ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(request)); + final List stateList = new ArrayList<>(); + while (Objects.nonNull(response)) { + log.debug("Scan response {}", response); + + response.items().stream().map(this::fromDynamoRecord).forEach(stateList::add); + if (!CollectionUtils.isNullOrEmpty(response.lastEvaluatedKey())) { + final ScanRequest continuationRequest = request.toBuilder() + .exclusiveStartKey(response.lastEvaluatedKey()) + .build(); + log.debug("Scan request {}", continuationRequest); + response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(continuationRequest)); + } else { + log.debug("Scan finished"); + response = null; + } + } + return stateList; + } catch (final ProvisionedThroughputExceededException e) { + log.warn( + "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs" + + " on the table.", + config.tableName()); + throw new ProvisionedThroughputException(e); + } catch (final ResourceNotFoundException e) { + throw new InvalidStateException( + String.format("Cannot list coordinatorState, because table %s does not exist", config.tableName())); + } catch (final DynamoDbException e) { + throw new DependencyException(e); + } + } + + /** + * Create a new {@link CoordinatorState} if it does not exist. + * @param state the state to create + * @return true if state was created, false if it already exists + * + * @throws DependencyException if DynamoDB put fails in an unexpected way + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB put fails due to lack of capacity + */ + public boolean createCoordinatorStateIfNotExists(final CoordinatorState state) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + log.debug("Creating coordinatorState {}", state); + + final PutItemRequest request = PutItemRequest.builder() + .tableName(config.tableName()) + .item(toDynamoRecord(state)) + .expected(getDynamoNonExistentExpectation()) + .build(); + + try { + FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.putItem(request)); + } catch (final ConditionalCheckFailedException e) { + log.info("Not creating coordinator state because the key already exists"); + return false; + } catch (final ProvisionedThroughputExceededException e) { + log.warn( + "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs" + + " on the table.", + config.tableName()); + throw new ProvisionedThroughputException(e); + } catch (final ResourceNotFoundException e) { + throw new InvalidStateException(String.format( + "Cannot create coordinatorState %s, because table %s does not exist", state, config.tableName())); + } catch (final DynamoDbException e) { + throw new DependencyException(e); + } + + log.info("Created CoordinatorState: {}", state); + return true; + } + + /** + * @param key Get the CoordinatorState for this key + * + * @throws InvalidStateException if ddb table does not exist + * @throws ProvisionedThroughputException if DynamoDB get fails due to lack of capacity + * @throws DependencyException if DynamoDB get fails in an unexpected way + * + * @return state for the specified key, or null if one doesn't exist + */ + public CoordinatorState getCoordinatorState(@NonNull final String key) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + log.debug("Getting coordinatorState with key {}", key); + + final GetItemRequest request = GetItemRequest.builder() + .tableName(config.tableName()) + .key(getCoordinatorStateKey(key)) + .consistentRead(true) + .build(); + + try { + final GetItemResponse result = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.getItem(request)); + + final Map dynamoRecord = result.item(); + if (CollectionUtils.isNullOrEmpty(dynamoRecord)) { + log.debug("No coordinatorState found with key {}, returning null.", key); + return null; + } + return fromDynamoRecord(dynamoRecord); + } catch (final ProvisionedThroughputExceededException e) { + log.warn( + "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs" + + " on the table.", + config.tableName()); + throw new ProvisionedThroughputException(e); + } catch (final ResourceNotFoundException e) { + throw new InvalidStateException(String.format( + "Cannot get coordinatorState for key %s, because table %s does not exist", + key, config.tableName())); + } catch (final DynamoDbException e) { + throw new DependencyException(e); + } + } + + /** + * Update fields of the given coordinator state in DynamoDB. Conditional on the provided expectation. + * + * @return true if update succeeded, false otherwise when expectations are not met + * + * @throws InvalidStateException if table does not exist + * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity + * @throws DependencyException if DynamoDB update fails in an unexpected way + */ + public boolean updateCoordinatorStateWithExpectation( + @NonNull final CoordinatorState state, final Map expectations) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + final Map expectationMap = getDynamoExistentExpectation(state.getKey()); + expectationMap.putAll(MapUtils.emptyIfNull(expectations)); + + final Map updateMap = getDynamoCoordinatorStateUpdate(state); + + final UpdateItemRequest request = UpdateItemRequest.builder() + .tableName(config.tableName()) + .key(getCoordinatorStateKey(state.getKey())) + .expected(expectationMap) + .attributeUpdates(updateMap) + .build(); + + try { + FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.updateItem(request)); + } catch (final ConditionalCheckFailedException e) { + log.debug("CoordinatorState update {} failed because conditions were not met", state); + return false; + } catch (final ProvisionedThroughputExceededException e) { + log.warn( + "Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs" + + " on the table.", + config.tableName()); + throw new ProvisionedThroughputException(e); + } catch (final ResourceNotFoundException e) { + throw new InvalidStateException(String.format( + "Cannot update coordinatorState for key %s, because table %s does not exist", + state.getKey(), config.tableName())); + } catch (final DynamoDbException e) { + throw new DependencyException(e); + } + + log.info("Coordinator state updated {}", state); + return true; + } + + private void createTableIfNotExists() throws DependencyException { + TableDescription tableDescription = getTableDescription(); + if (tableDescription == null) { + final CreateTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.createTable(getRequest())); + tableDescription = response.tableDescription(); + log.info("DDB Table: {} created", config.tableName()); + } else { + log.info("Skipping DDB table {} creation as it already exists", config.tableName()); + } + + if (tableDescription.tableStatus() != TableStatus.ACTIVE) { + log.info("Waiting for DDB Table: {} to become active", config.tableName()); + try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) { + final WaiterResponse response = + unwrappingFuture(() -> waiter.waitUntilTableExists( + r -> r.tableName(config.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10)))); + response.matched() + .response() + .orElseThrow(() -> new DependencyException(new IllegalStateException( + "Creating CoordinatorState table timed out", + response.matched().exception().orElse(null)))); + } + unwrappingFuture(() -> DdbUtil.pitrEnabler(config, dynamoDbAsyncClient)); + } + } + + private CreateTableRequest getRequest() { + final CreateTableRequest.Builder requestBuilder = CreateTableRequest.builder() + .tableName(config.tableName()) + .keySchema(KeySchemaElement.builder() + .attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME) + .keyType(KeyType.HASH) + .build()) + .attributeDefinitions(AttributeDefinition.builder() + .attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME) + .attributeType(ScalarAttributeType.S) + .build()) + .deletionProtectionEnabled(config.deletionProtectionEnabled()); + + if (nonNull(config.tags()) && !config.tags().isEmpty()) { + requestBuilder.tags(config.tags()); + } + + switch (config.billingMode()) { + case PAY_PER_REQUEST: + requestBuilder.billingMode(BillingMode.PAY_PER_REQUEST); + break; + case PROVISIONED: + requestBuilder.billingMode(BillingMode.PROVISIONED); + + final ProvisionedThroughput throughput = ProvisionedThroughput.builder() + .readCapacityUnits(config.readCapacity()) + .writeCapacityUnits(config.writeCapacity()) + .build(); + requestBuilder.provisionedThroughput(throughput); + break; + } + return requestBuilder.build(); + } + + private Map getCoordinatorStateKey(@NonNull final String key) { + return Collections.singletonMap( + COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(key)); + } + + private CoordinatorState fromDynamoRecord(final Map dynamoRecord) { + final HashMap attributes = new HashMap<>(dynamoRecord); + final String keyValue = + DynamoUtils.safeGetString(attributes.remove(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)); + + final MigrationState migrationState = MigrationState.deserialize(keyValue, attributes); + if (migrationState != null) { + log.debug("Retrieved MigrationState {}", migrationState); + return migrationState; + } + + final CoordinatorState c = + CoordinatorState.builder().key(keyValue).attributes(attributes).build(); + log.debug("Retrieved coordinatorState {}", c); + + return c; + } + + private Map toDynamoRecord(final CoordinatorState state) { + final Map result = new HashMap<>(); + result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(state.getKey())); + if (state instanceof MigrationState) { + result.putAll(((MigrationState) state).serialize()); + } + if (!CollectionUtils.isNullOrEmpty(state.getAttributes())) { + result.putAll(state.getAttributes()); + } + return result; + } + + private Map getDynamoNonExistentExpectation() { + final Map result = new HashMap<>(); + + final ExpectedAttributeValue expectedAV = + ExpectedAttributeValue.builder().exists(false).build(); + result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV); + + return result; + } + + private Map getDynamoExistentExpectation(final String keyValue) { + final Map result = new HashMap<>(); + + final ExpectedAttributeValue expectedAV = ExpectedAttributeValue.builder() + .value(AttributeValue.fromS(keyValue)) + .build(); + result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV); + + return result; + } + + private Map getDynamoCoordinatorStateUpdate(final CoordinatorState state) { + final HashMap updates = new HashMap<>(); + if (state instanceof MigrationState) { + updates.putAll(((MigrationState) state).getDynamoUpdate()); + } + state.getAttributes() + .forEach((attribute, value) -> updates.put( + attribute, + AttributeValueUpdate.builder() + .value(value) + .action(AttributeAction.PUT) + .build())); + return updates; + } + + private TableDescription getTableDescription() { + try { + final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable( + DescribeTableRequest.builder().tableName(config.tableName()).build())); + return response.table(); + } catch (final ResourceNotFoundException e) { + return null; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java index 4c7f25daa..4562b8f71 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDecider.java @@ -28,12 +28,17 @@ import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; import software.amazon.awssdk.utils.CollectionUtils; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.exceptions.DependencyException; import software.amazon.kinesis.leases.exceptions.InvalidStateException; import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; /** * An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId. @@ -46,7 +51,7 @@ * This ensures redundancy for shard-sync during host failures. */ @Slf4j -class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider { +public class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider { // Fixed seed so that the shuffle order is preserved across workers static final int DETERMINISTIC_SHUFFLE_SEED = 1947; @@ -59,6 +64,7 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider { private final LeaseRefresher leaseRefresher; private final int numPeriodicShardSyncWorkers; private final ScheduledExecutorService leaderElectionThreadPool; + private final MetricsFactory metricsFactory; private volatile Set leaders; @@ -67,11 +73,17 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider { * @param leaderElectionThreadPool Thread-pool to be used for leaderElection. * @param numPeriodicShardSyncWorkers Number of leaders that will be elected to perform periodic shard syncs. */ - DeterministicShuffleShardSyncLeaderDecider( + public DeterministicShuffleShardSyncLeaderDecider( LeaseRefresher leaseRefresher, ScheduledExecutorService leaderElectionThreadPool, - int numPeriodicShardSyncWorkers) { - this(leaseRefresher, leaderElectionThreadPool, numPeriodicShardSyncWorkers, new ReentrantReadWriteLock()); + int numPeriodicShardSyncWorkers, + MetricsFactory metricsFactory) { + this( + leaseRefresher, + leaderElectionThreadPool, + numPeriodicShardSyncWorkers, + new ReentrantReadWriteLock(), + metricsFactory); } /** @@ -84,11 +96,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider { LeaseRefresher leaseRefresher, ScheduledExecutorService leaderElectionThreadPool, int numPeriodicShardSyncWorkers, - ReadWriteLock readWriteLock) { + ReadWriteLock readWriteLock, + MetricsFactory metricsFactory) { this.leaseRefresher = leaseRefresher; this.leaderElectionThreadPool = leaderElectionThreadPool; this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers; this.readWriteLock = readWriteLock; + this.metricsFactory = metricsFactory; } /* @@ -146,8 +160,13 @@ public synchronized Boolean isLeader(String workerId) { ELECTION_SCHEDULING_INTERVAL_MILLIS, TimeUnit.MILLISECONDS); } - - return executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId)); + final boolean response = executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId)); + final MetricsScope metricsScope = + MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER); + metricsScope.addData( + METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED); + MetricsUtil.endScope(metricsScope); + return response; } @Override diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java new file mode 100644 index 000000000..c4aecdda2 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializer.java @@ -0,0 +1,403 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator; + +import java.time.Duration; +import java.util.Objects; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import java.util.function.Supplier; + +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Getter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode; +import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager; +import software.amazon.kinesis.coordinator.migration.ClientVersion; +import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider; +import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsReporter; + +import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT; +import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT; +import static software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager.DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD; + +/** + * This class is responsible for initializing the KCL components that supports + * seamless upgrade from v2.x to v3.x. + * During specific versions, it also dynamically switches the functionality + * to be either vanilla 3.x or 2.x compatible. + * + * It is responsible for creating: + * 1. LeaderDecider + * 2. LAM + * 3. WorkerMetricStatsReporter + * + * It manages initializing the following components at initialization time + * 1. workerMetricsDAO and workerMetricsManager + * 2. leaderDecider + * 3. MigrationAdaptiveLeaseAssignmentModeProvider + * + * It updates the following components dynamically: + * 1. starts/stops LAM + * 2. starts/stops WorkerMetricStatsReporter + * 3. updates LeaseAssignmentMode to either DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT or WORKER_UTILIZATION_AWARE_ASSIGNMENT + * 4. creates GSI (deletion is done by KclMigrationTool) + * 5. creates WorkerMetricStats table (deletion is done by KclMigrationTool) + * 6. updates LeaderDecider to either DeterministicShuffleShardSyncLeaderDecider or DynamoDBLockBasedLeaderDecider + */ +@Slf4j +@KinesisClientInternalApi +@ThreadSafe +@Accessors(fluent = true) +public final class DynamicMigrationComponentsInitializer { + private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L; + + @Getter + private final MetricsFactory metricsFactory; + + @Getter + private final LeaseRefresher leaseRefresher; + + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService workerMetricsThreadPool; + + @Getter + private final WorkerMetricStatsDAO workerMetricsDAO; + + private final WorkerMetricStatsManager workerMetricsManager; + private final ScheduledExecutorService lamThreadPool; + private final BiFunction lamCreator; + private final Supplier adaptiveLeaderDeciderCreator; + private final Supplier deterministicLeaderDeciderCreator; + private final Supplier ddbLockBasedLeaderDeciderCreator; + + @Getter + private final String workerIdentifier; + + private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig; + + @Getter + private final long workerMetricsExpirySeconds; + + private final MigrationAdaptiveLeaseAssignmentModeProvider leaseModeChangeConsumer; + + @Getter + private LeaderDecider leaderDecider; + + private LeaseAssignmentManager leaseAssignmentManager; + private ScheduledFuture workerMetricsReporterFuture; + private LeaseAssignmentMode currentAssignmentMode; + private boolean dualMode; + private boolean initialized; + + @Builder(access = AccessLevel.PACKAGE) + DynamicMigrationComponentsInitializer( + final MetricsFactory metricsFactory, + final LeaseRefresher leaseRefresher, + final CoordinatorStateDAO coordinatorStateDAO, + final ScheduledExecutorService workerMetricsThreadPool, + final WorkerMetricStatsDAO workerMetricsDAO, + final WorkerMetricStatsManager workerMetricsManager, + final ScheduledExecutorService lamThreadPool, + final BiFunction lamCreator, + final Supplier adaptiveLeaderDeciderCreator, + final Supplier deterministicLeaderDeciderCreator, + final Supplier ddbLockBasedLeaderDeciderCreator, + final String workerIdentifier, + final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig, + final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) { + this.metricsFactory = metricsFactory; + this.leaseRefresher = leaseRefresher; + this.coordinatorStateDAO = coordinatorStateDAO; + this.workerIdentifier = workerIdentifier; + this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig; + this.workerMetricsExpirySeconds = Duration.ofMillis(DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD + * workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis()) + .getSeconds(); + this.workerMetricsManager = workerMetricsManager; + this.workerMetricsDAO = workerMetricsDAO; + this.workerMetricsThreadPool = workerMetricsThreadPool; + this.lamThreadPool = lamThreadPool; + this.lamCreator = lamCreator; + this.adaptiveLeaderDeciderCreator = adaptiveLeaderDeciderCreator; + this.deterministicLeaderDeciderCreator = deterministicLeaderDeciderCreator; + this.ddbLockBasedLeaderDeciderCreator = ddbLockBasedLeaderDeciderCreator; + this.leaseModeChangeConsumer = leaseAssignmentModeProvider; + } + + public void initialize(final ClientVersion migrationStateMachineStartingClientVersion) throws DependencyException { + if (initialized) { + log.info("Already initialized, nothing to do"); + return; + } + + // always collect metrics so that when we flip to start reporting we will have accurate historical data. + log.info("Start collection of WorkerMetricStats"); + workerMetricsManager.startManager(); + if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X) { + initializeComponentsFor3x(); + } else { + initializeComponentsForMigration(migrationStateMachineStartingClientVersion); + } + log.info("Initialized dual mode {} current assignment mode {}", dualMode, currentAssignmentMode); + + log.info("Creating LAM"); + leaseAssignmentManager = lamCreator.apply(lamThreadPool, leaderDecider); + log.info("Initializing {}", leaseModeChangeConsumer.getClass().getSimpleName()); + leaseModeChangeConsumer.initialize(dualMode, currentAssignmentMode); + initialized = true; + } + + private void initializeComponentsFor3x() { + log.info("Initializing for 3x functionality"); + dualMode = false; + currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT; + log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode); + leaderDecider = ddbLockBasedLeaderDeciderCreator.get(); + log.info("Initializing {}", leaderDecider.getClass().getSimpleName()); + leaderDecider.initialize(); + } + + private void initializeComponentsForMigration(final ClientVersion migrationStateMachineStartingClientVersion) { + log.info("Initializing for migration to 3x"); + dualMode = true; + final LeaderDecider initialLeaderDecider; + if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) { + currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT; + initialLeaderDecider = ddbLockBasedLeaderDeciderCreator.get(); + } else { + currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT; + initialLeaderDecider = deterministicLeaderDeciderCreator.get(); + } + log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode); + + final MigrationAdaptiveLeaderDecider adaptiveLeaderDecider = adaptiveLeaderDeciderCreator.get(); + log.info( + "Initializing MigrationAdaptiveLeaderDecider with {}", + initialLeaderDecider.getClass().getSimpleName()); + adaptiveLeaderDecider.updateLeaderDecider(initialLeaderDecider); + this.leaderDecider = adaptiveLeaderDecider; + } + + void shutdown() { + log.info("Shutting down components"); + if (initialized) { + log.info("Stopping LAM, LeaderDecider, workerMetrics reporting and collection"); + leaseAssignmentManager.stop(); + // leader decider is shut down later when scheduler is doing a final shutdown + // since scheduler still accesses the leader decider while shutting down + stopWorkerMetricsReporter(); + workerMetricsManager.stopManager(); + } + + // lam does not manage lifecycle of its threadpool to easily stop/start dynamically. + // once migration code is obsolete (i.e. all 3x functionality is the baseline and no + // migration is needed), it can be moved inside lam + log.info("Shutting down lamThreadPool and workerMetrics reporter thread pool"); + lamThreadPool.shutdown(); + workerMetricsThreadPool.shutdown(); + try { + if (!lamThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + lamThreadPool.shutdownNow(); + } + } catch (final InterruptedException e) { + log.warn("Interrupted while waiting for shutdown of LeaseAssignmentManager ThreadPool", e); + lamThreadPool.shutdownNow(); + } + + try { + if (!workerMetricsThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + workerMetricsThreadPool.shutdownNow(); + } + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + log.warn("Interrupted while waiting for shutdown of WorkerMetricStatsManager ThreadPool", e); + workerMetricsThreadPool.shutdownNow(); + } + } + + private void startWorkerMetricsReporting() throws DependencyException { + if (workerMetricsReporterFuture != null) { + log.info("Worker metrics reporting is already running..."); + return; + } + log.info("Initializing WorkerMetricStats"); + this.workerMetricsDAO.initialize(); + log.info("Starting worker metrics reporter"); + // Start with a delay for workerStatsManager to capture some values and start reporting. + workerMetricsReporterFuture = workerMetricsThreadPool.scheduleAtFixedRate( + new WorkerMetricStatsReporter(metricsFactory, workerIdentifier, workerMetricsManager, workerMetricsDAO), + workerUtilizationAwareAssignmentConfig.inMemoryWorkerMetricsCaptureFrequencyMillis() * 2L, + workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis(), + TimeUnit.MILLISECONDS); + } + + private void stopWorkerMetricsReporter() { + log.info("Stopping worker metrics reporter"); + if (workerMetricsReporterFuture != null) { + workerMetricsReporterFuture.cancel(false); + workerMetricsReporterFuture = null; + } + } + + /** + * Create LeaseOwnerToLeaseKey GSI for the lease table + * @param blockingWait whether to wait for the GSI creation or not, if false, the gsi creation will be initiated + * but this call will not block for its creation + * @throws DependencyException If DDB fails unexpectedly when creating the GSI + */ + private void createGsi(final boolean blockingWait) throws DependencyException { + log.info("Creating Lease table GSI if it does not exist"); + // KCLv3.0 always starts with GSI available + leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists(); + + if (blockingWait) { + log.info("Waiting for Lease table GSI creation"); + final long secondsBetweenPolls = 10L; + final long timeoutSeconds = 600L; + final boolean isIndexActive = + leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(secondsBetweenPolls, timeoutSeconds); + + if (!isIndexActive) { + throw new DependencyException( + new IllegalStateException("Creating LeaseOwnerToLeaseKeyIndex on Lease table timed out")); + } + } + } + + /** + * Initialize KCL with components and configuration to support upgrade from 2x. This can happen + * at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X. + * Or Dynamically during roll-forward from ClientVersion.CLIENT_VERSION_2X. + */ + public synchronized void initializeClientVersionForUpgradeFrom2x(final ClientVersion fromClientVersion) + throws DependencyException { + log.info("Initializing KCL components for upgrade from 2x from {}", fromClientVersion); + + createGsi(false); + startWorkerMetricsReporting(); + // LAM is not started until the dynamic flip to 3xWithRollback + } + + /** + * Initialize KCL with components and configuration to run vanilla 3x functionality. This can happen + * at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X, or dynamically + * during a new deployment when existing worker are in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK + */ + public synchronized void initializeClientVersionFor3x(final ClientVersion fromClientVersion) + throws DependencyException { + log.info("Initializing KCL components for 3x from {}", fromClientVersion); + + log.info("Initializing LeaseAssignmentManager, DDB-lock-based leader decider, WorkerMetricStats manager" + + " and creating the Lease table GSI if it does not exist"); + if (fromClientVersion == ClientVersion.CLIENT_VERSION_INIT) { + // gsi may already exist and be active for migrated application. + createGsi(true); + startWorkerMetricsReporting(); + log.info("Starting LAM"); + leaseAssignmentManager.start(); + } + // nothing to do when transitioning from CLIENT_VERSION_3X_WITH_ROLLBACK. + } + + /** + * Initialize KCL with components and configuration to run 2x compatible functionality + * while allowing roll-forward. This can happen at KCL Worker startup when MigrationStateMachine + * starts in ClientVersion.CLIENT_VERSION_2X (after a rollback) + * Or Dynamically during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK. + */ + public synchronized void initializeClientVersionFor2x(final ClientVersion fromClientVersion) { + log.info("Initializing KCL components for rollback to 2x from {}", fromClientVersion); + + if (fromClientVersion != ClientVersion.CLIENT_VERSION_INIT) { + // dynamic rollback + stopWorkerMetricsReporter(); + // Migration Tool will delete the lease table LeaseOwner GSI + // and WorkerMetricStats table + } + + if (fromClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) { + // we are rolling back after flip + currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT; + notifyLeaseAssignmentModeChange(); + log.info("Stopping LAM"); + leaseAssignmentManager.stop(); + final LeaderDecider leaderDecider = deterministicLeaderDeciderCreator.get(); + if (this.leaderDecider instanceof MigrationAdaptiveLeaderDecider) { + log.info( + "Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName()); + ((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider); + } else { + throw new IllegalStateException(String.format("Unexpected leader decider %s", this.leaderDecider)); + } + } + } + + /** + * Initialize KCL with components and configuration to run vanilla 3x functionality + * while allowing roll-back to 2x functionality. This can happen at KCL Worker startup + * when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK (after the flip) + * Or Dynamically during flip from CLIENT_VERSION_UPGRADE_FROM_2X. + */ + public synchronized void initializeClientVersionFor3xWithRollback(final ClientVersion fromClientVersion) + throws DependencyException { + log.info("Initializing KCL components for 3x with rollback from {}", fromClientVersion); + + if (fromClientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X) { + // dynamic flip + currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT; + notifyLeaseAssignmentModeChange(); + final LeaderDecider leaderDecider = ddbLockBasedLeaderDeciderCreator.get(); + log.info("Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName()); + ((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider); + } else { + startWorkerMetricsReporting(); + } + + log.info("Starting LAM"); + leaseAssignmentManager.start(); + } + + /** + * Synchronously invoke the consumer to change the lease assignment mode. + */ + private void notifyLeaseAssignmentModeChange() { + if (dualMode) { + log.info("Notifying {} of {}", leaseModeChangeConsumer, currentAssignmentMode); + if (Objects.nonNull(leaseModeChangeConsumer)) { + try { + leaseModeChangeConsumer.updateLeaseAssignmentMode(currentAssignmentMode); + } catch (final Exception e) { + log.warn("LeaseAssignmentMode change consumer threw exception", e); + } + } + } else { + throw new IllegalStateException("Unexpected assignment mode change"); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java new file mode 100644 index 000000000..9fd185a56 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/DynamoDbAsyncToSyncClientAdapter.java @@ -0,0 +1,144 @@ +package software.amazon.kinesis.coordinator; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.function.Supplier; + +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.BatchGetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.BatchGetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest; +import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest; +import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.PutItemResponse; +import software.amazon.awssdk.services.dynamodb.model.QueryRequest; +import software.amazon.awssdk.services.dynamodb.model.QueryResponse; +import software.amazon.awssdk.services.dynamodb.model.ScanRequest; +import software.amazon.awssdk.services.dynamodb.model.ScanResponse; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse; +import software.amazon.awssdk.services.dynamodb.paginators.BatchGetItemIterable; +import software.amazon.awssdk.services.dynamodb.paginators.QueryIterable; +import software.amazon.awssdk.services.dynamodb.paginators.ScanIterable; + +/** + * DDB Lock client depends on DynamoDbClient and KCL only has DynamoDbAsyncClient configured. + * This wrapper delegates APIs from sync client to async client internally so that it can + * be used with the DDB Lock client. + */ +public class DynamoDbAsyncToSyncClientAdapter implements DynamoDbClient { + private final DynamoDbAsyncClient asyncClient; + + public DynamoDbAsyncToSyncClientAdapter(final DynamoDbAsyncClient asyncClient) { + this.asyncClient = asyncClient; + } + + @Override + public String serviceName() { + return asyncClient.serviceName(); + } + + @Override + public void close() { + asyncClient.close(); + } + + private T handleException(final Supplier> task) { + try { + return task.get().join(); + } catch (final CompletionException e) { + rethrow(e.getCause()); + return null; + } + } + + @Override + public CreateTableResponse createTable(final CreateTableRequest request) { + return handleException(() -> asyncClient.createTable(request)); + } + + @Override + public DescribeTableResponse describeTable(final DescribeTableRequest request) { + return handleException(() -> asyncClient.describeTable(request)); + } + + @Override + public DeleteTableResponse deleteTable(final DeleteTableRequest request) { + return handleException(() -> asyncClient.deleteTable(request)); + } + + @Override + public DeleteItemResponse deleteItem(final DeleteItemRequest request) { + return handleException(() -> asyncClient.deleteItem(request)); + } + + @Override + public GetItemResponse getItem(final GetItemRequest request) { + return handleException(() -> asyncClient.getItem(request)); + } + + @Override + public PutItemResponse putItem(final PutItemRequest request) { + return handleException(() -> asyncClient.putItem(request)); + } + + @Override + public UpdateItemResponse updateItem(final UpdateItemRequest request) { + return handleException(() -> asyncClient.updateItem(request)); + } + + @Override + public QueryResponse query(final QueryRequest request) { + return handleException(() -> asyncClient.query(request)); + } + + @Override + public ScanResponse scan(final ScanRequest request) { + return handleException(() -> asyncClient.scan(request)); + } + + @Override + public QueryIterable queryPaginator(final QueryRequest request) { + return new QueryIterable(this, request); + } + + @Override + public ScanIterable scanPaginator(final ScanRequest request) { + return new ScanIterable(this, request); + } + + @Override + public BatchGetItemResponse batchGetItem(final BatchGetItemRequest request) { + return handleException(() -> asyncClient.batchGetItem(request)); + } + + @Override + public BatchWriteItemResponse batchWriteItem(final BatchWriteItemRequest request) { + return handleException(() -> asyncClient.batchWriteItem(request)); + } + + @Override + public BatchGetItemIterable batchGetItemPaginator(final BatchGetItemRequest request) { + return new BatchGetItemIterable(this, request); + } + + private static void rethrow(final Throwable e) { + castAndThrow(e); + } + + @SuppressWarnings("unchecked") + private static void castAndThrow(final Throwable e) throws T { + throw (T) e; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java index 140791af6..fc774cf4c 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/LeaderDecider.java @@ -21,6 +21,8 @@ * worker is one of the leaders designated to execute shard-sync and then acts accordingly. */ public interface LeaderDecider { + String METRIC_OPERATION_LEADER_DECIDER = "LeaderDecider"; + String METRIC_OPERATION_LEADER_DECIDER_IS_LEADER = METRIC_OPERATION_LEADER_DECIDER + ":IsLeader"; /** * Method invoked to check the given workerId corresponds to one of the workers @@ -36,4 +38,32 @@ public interface LeaderDecider { * being used in the LeaderDecider implementation. */ void shutdown(); + + /** + * Performs initialization tasks for decider if any. + */ + default void initialize() { + // No-op by default + } + + /** + * Returns if any ACTIVE leader exists that is elected by the current implementation. + * Note: Some implementation (like DeterministicShuffleShardSyncLeaderDecider) will always have a leader and will + * return true always. + */ + default boolean isAnyLeaderElected() { + return true; + } + + /** + * If the current worker is the leader, then releases the leadership else does nothing. + * This might not be relevant for some implementations, for e.g. DeterministicShuffleShardSyncLeaderDecider does + * not have mechanism to release leadership. + * + * Current worker if leader releases leadership, it's possible that the current worker assume leadership sometime + * later again in future elections. + */ + default void releaseLeadershipIfHeld() { + // No-op by default + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java new file mode 100644 index 000000000..ad4124bda --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/MigrationAdaptiveLeaseAssignmentModeProvider.java @@ -0,0 +1,126 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator; + +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +/** + * Provides the lease assignment mode KCL must operate in during migration + * from 2.x to 3.x. + * KCL v2.x lease assignment is based on distributed-worker-stealing algorithm + * which balances lease count across workers. + * KCL v3.x lease assignment is based on a centralized-lease-assignment algorithm + * which balances resource utilization metrics(e.g. CPU utilization) across workers. + * + * For a new application starting in KCL v3.x, there is no migration needed, + * so KCL will initialize with the lease assignment mode accordingly, and it will + * not change dynamically. + * + * During upgrade from 2.x to 3.x, KCL library needs an ability to + * start in v2.x assignment mode but dynamically change to v3.x assignment. + * In this case, both 2.x and 3.x lease assignment will be running but one + * of them will be a no-op based on the mode. + * + * The methods and internal state is guarded for concurrent access to allow + * both lease assignment algorithms to access the state concurrently while + * it could be dynamically updated. + */ +@KinesisClientInternalApi +@Slf4j +@ThreadSafe +@NoArgsConstructor +public final class MigrationAdaptiveLeaseAssignmentModeProvider { + + public enum LeaseAssignmentMode { + /** + * This is the 2.x assignment mode. + * This mode assigns leases based on the number of leases. + * This mode involves each worker independently determining how many leases to pick or how many leases to steal + * from other workers. + */ + DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT, + + /** + * This is the 3.x assigment mode. + * This mode uses each worker's resource utilization to perform lease assignment. + * Assignment is done by a single worker (elected leader), which looks at WorkerMetricStats for each worker to + * determine lease assignment. + * + * This mode primarily does + * 1. Starts WorkerMetricStatsManager on the worker which starts publishing WorkerMetricStats + * 2. Starts the LeaseDiscoverer + * 3. Creates if not already available the LeaseOwnerToLeaseKey GSI on the lease table and validate that is + * ACTIVE. + */ + WORKER_UTILIZATION_AWARE_ASSIGNMENT; + } + + private LeaseAssignmentMode currentMode; + private boolean initialized = false; + private boolean dynamicModeChangeSupportNeeded; + + /** + * Specify whether both lease assignment algorithms should be initialized to + * support dynamically changing lease mode. + * @return true if lease assignment mode can change dynamically + * false otherwise. + */ + public synchronized boolean dynamicModeChangeSupportNeeded() { + return dynamicModeChangeSupportNeeded; + } + + /** + * Provide the current lease assignment mode in which KCL should perform lease assignment + * @return the current lease assignment mode + */ + public synchronized LeaseAssignmentMode getLeaseAssignmentMode() { + if (!initialized) { + throw new IllegalStateException("AssignmentMode is not initialized"); + } + return currentMode; + } + + synchronized void initialize(final boolean dynamicModeChangeSupportNeeded, final LeaseAssignmentMode mode) { + if (!initialized) { + log.info("Initializing dynamicModeChangeSupportNeeded {} mode {}", dynamicModeChangeSupportNeeded, mode); + this.dynamicModeChangeSupportNeeded = dynamicModeChangeSupportNeeded; + this.currentMode = mode; + this.initialized = true; + return; + } + log.info( + "Already initialized dynamicModeChangeSupportNeeded {} mode {}. Ignoring new values {}, {}", + this.dynamicModeChangeSupportNeeded, + this.currentMode, + dynamicModeChangeSupportNeeded, + mode); + } + + synchronized void updateLeaseAssignmentMode(final LeaseAssignmentMode mode) { + if (!initialized) { + throw new IllegalStateException("Cannot change mode before initializing"); + } + if (dynamicModeChangeSupportNeeded) { + log.info("Changing Lease assignment mode from {} to {}", currentMode, mode); + this.currentMode = mode; + return; + } + throw new IllegalStateException(String.format( + "Lease assignment mode already initialized to %s cannot" + " change to %s", this.currentMode, mode)); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java index f0133ec85..192cf560d 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManager.java @@ -87,7 +87,7 @@ class PeriodicShardSyncManager { private final Map hashRangeHoleTrackerMap = new HashMap<>(); private final String workerId; - private final LeaderDecider leaderDecider; + private LeaderDecider leaderDecider; private final LeaseRefresher leaseRefresher; private final Map currentStreamConfigMap; private final Function shardSyncTaskManagerProvider; @@ -105,7 +105,6 @@ class PeriodicShardSyncManager { PeriodicShardSyncManager( String workerId, - LeaderDecider leaderDecider, LeaseRefresher leaseRefresher, Map currentStreamConfigMap, Function shardSyncTaskManagerProvider, @@ -117,7 +116,6 @@ class PeriodicShardSyncManager { AtomicBoolean leaderSynced) { this( workerId, - leaderDecider, leaseRefresher, currentStreamConfigMap, shardSyncTaskManagerProvider, @@ -132,7 +130,6 @@ class PeriodicShardSyncManager { PeriodicShardSyncManager( String workerId, - LeaderDecider leaderDecider, LeaseRefresher leaseRefresher, Map currentStreamConfigMap, Function shardSyncTaskManagerProvider, @@ -144,9 +141,7 @@ class PeriodicShardSyncManager { int leasesRecoveryAuditorInconsistencyConfidenceThreshold, AtomicBoolean leaderSynced) { Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager."); - Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager."); this.workerId = workerId; - this.leaderDecider = leaderDecider; this.leaseRefresher = leaseRefresher; this.currentStreamConfigMap = currentStreamConfigMap; this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider; @@ -160,7 +155,9 @@ class PeriodicShardSyncManager { this.leaderSynced = leaderSynced; } - public synchronized TaskResult start() { + public synchronized TaskResult start(final LeaderDecider leaderDecider) { + Validate.notNull(leaderDecider, "LeaderDecider is required to start PeriodicShardSyncManager."); + this.leaderDecider = leaderDecider; if (!isRunning) { final Runnable periodicShardSyncer = () -> { try { @@ -435,7 +432,7 @@ private List fillWithHashRangesIfRequired(StreamIdentifier streamIdentifi leaseRefresher.updateLeaseWithMetaInfo(lease, UpdateField.HASH_KEY_RANGE); } catch (Exception e) { log.warn( - "Unable to update hash range key information for lease {} of stream {}." + "Unable to update hash range key information for lease {} of stream {}. " + "This may result in explicit lease sync.", lease.leaseKey(), streamIdentifier); diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java index 3d2b6c416..1a9dfe87e 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/Scheduler.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CompletableFuture; @@ -44,6 +45,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.reactivex.rxjava3.plugins.RxJavaPlugins; import lombok.AccessLevel; import lombok.Getter; @@ -55,15 +57,23 @@ import software.amazon.awssdk.arns.Arn; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.utils.Validate; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; import software.amazon.kinesis.checkpoint.CheckpointConfig; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; import software.amazon.kinesis.common.StreamConfig; import software.amazon.kinesis.common.StreamIdentifier; +import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager; +import software.amazon.kinesis.coordinator.migration.MigrationStateMachine; +import software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl; +import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider; +import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider; import software.amazon.kinesis.leases.HierarchicalShardSyncer; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseCleanupManager; import software.amazon.kinesis.leases.LeaseCoordinator; import software.amazon.kinesis.leases.LeaseManagementConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig; +import software.amazon.kinesis.leases.LeaseManagementFactory; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseSerializer; import software.amazon.kinesis.leases.MultiStreamLease; @@ -98,6 +108,9 @@ import software.amazon.kinesis.retrieval.RecordsPublisher; import software.amazon.kinesis.retrieval.RetrievalConfig; import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder; +import software.amazon.kinesis.worker.WorkerMetricsSelector; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager; import static software.amazon.kinesis.common.ArnUtil.constructStreamArn; import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType; @@ -106,12 +119,14 @@ /** * */ -@Getter +@Getter(AccessLevel.PRIVATE) @Accessors(fluent = true) @Slf4j +@KinesisClientInternalApi public class Scheduler implements Runnable { private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1; + private static final long LEASE_TABLE_CHECK_FREQUENCY_MILLIS = 3 * 1000L; private static final long MIN_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 1000L; private static final long MAX_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 30 * 1000L; @@ -133,7 +148,9 @@ public class Scheduler implements Runnable { private final ProcessorConfig processorConfig; private final RetrievalConfig retrievalConfig; + @Getter(AccessLevel.PACKAGE) private final String applicationName; + private final int maxInitializationAttempts; private final Checkpointer checkpoint; private final long shardConsumerDispatchPollIntervalMillis; @@ -156,7 +173,10 @@ public class Scheduler implements Runnable { private final long failoverTimeMillis; private final long taskBackoffTimeMillis; private final boolean isMultiStreamMode; + + @Getter(AccessLevel.PACKAGE) private final Map currentStreamConfigMap = new StreamConfigMap(); + private final StreamTracker streamTracker; private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy; private final long listShardsBackoffTimeMillis; @@ -167,19 +187,30 @@ public class Scheduler implements Runnable { private final AggregatorUtil aggregatorUtil; private final Function hierarchicalShardSyncerProvider; private final long schedulerInitializationBackoffTimeMillis; - private final LeaderDecider leaderDecider; + private LeaderDecider leaderDecider; + + @Getter(AccessLevel.PACKAGE) private final Map staleStreamDeletionMap = new HashMap<>(); + private final LeaseCleanupManager leaseCleanupManager; private final SchemaRegistryDecoder schemaRegistryDecoder; + @Getter(AccessLevel.PACKAGE) private final DeletedStreamListProvider deletedStreamListProvider; + private final MigrationStateMachine migrationStateMachine; + private final DynamicMigrationComponentsInitializer migrationComponentsInitializer; + private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider; + // Holds consumers for shards the worker is currently tracking. Key is shard // info, value is ShardConsumer. + @Getter(AccessLevel.PACKAGE) private final ConcurrentMap shardInfoShardConsumerMap = new ConcurrentHashMap<>(); private volatile boolean shutdown; private volatile long shutdownStartTimeMillis; + + @Getter(AccessLevel.PACKAGE) private volatile boolean shutdownComplete = false; private final Object lock = new Object(); @@ -187,8 +218,6 @@ public class Scheduler implements Runnable { private final Stopwatch streamSyncWatch = Stopwatch.createUnstarted(); private boolean leasesSyncedOnAppInit = false; - - @Getter(AccessLevel.NONE) private final AtomicBoolean leaderSynced = new AtomicBoolean(false); /** @@ -200,7 +229,6 @@ public class Scheduler implements Runnable { * CountDownLatch used by the GracefulShutdownCoordinator. Reaching zero means that * the scheduler's finalShutdown() call has completed. */ - @Getter(AccessLevel.NONE) private final CountDownLatch finalShutdownLatch = new CountDownLatch(1); @VisibleForTesting @@ -259,11 +287,32 @@ protected Scheduler( // Determine leaseSerializer based on availability of MultiStreamTracker. final LeaseSerializer leaseSerializer = isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer(); - this.leaseCoordinator = this.leaseManagementConfig - .leaseManagementFactory(leaseSerializer, isMultiStreamMode) - .createLeaseCoordinator(this.metricsFactory); + + final LeaseManagementFactory leaseManagementFactory = + this.leaseManagementConfig.leaseManagementFactory(leaseSerializer, isMultiStreamMode); + this.leaseCoordinator = + leaseManagementFactory.createLeaseCoordinator(this.metricsFactory, shardInfoShardConsumerMap); this.leaseRefresher = this.leaseCoordinator.leaseRefresher(); + final CoordinatorStateDAO coordinatorStateDAO = new CoordinatorStateDAO( + leaseManagementConfig.dynamoDBClient(), coordinatorConfig().coordinatorStateTableConfig()); + this.leaseAssignmentModeProvider = new MigrationAdaptiveLeaseAssignmentModeProvider(); + this.migrationComponentsInitializer = createDynamicMigrationComponentsInitializer(coordinatorStateDAO); + this.migrationStateMachine = new MigrationStateMachineImpl( + metricsFactory, + System::currentTimeMillis, + coordinatorStateDAO, + Executors.newScheduledThreadPool( + 2, + new ThreadFactoryBuilder() + .setNameFormat("MigrationStateMachine-%04d") + .build()), + coordinatorConfig.clientVersionConfig(), + new Random(), + this.migrationComponentsInitializer, + leaseManagementConfig.workerIdentifier(), + Duration.ofMinutes(10).getSeconds()); + // // TODO: Figure out what to do with lease manage <=> checkpoint relationship // @@ -280,9 +329,8 @@ protected Scheduler( this.diagnosticEventFactory = diagnosticEventFactory; this.diagnosticEventHandler = new DiagnosticEventLogger(); this.deletedStreamListProvider = new DeletedStreamListProvider(); - this.shardSyncTaskManagerProvider = streamConfig -> this.leaseManagementConfig - .leaseManagementFactory(leaseSerializer, isMultiStreamMode) - .createShardSyncTaskManager(this.metricsFactory, streamConfig, this.deletedStreamListProvider); + this.shardSyncTaskManagerProvider = streamConfig -> leaseManagementFactory.createShardSyncTaskManager( + this.metricsFactory, streamConfig, this.deletedStreamListProvider); this.shardPrioritization = this.coordinatorConfig.shardPrioritization(); this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion(); this.skipShardSyncAtWorkerInitializationIfLeasesExist = @@ -299,8 +347,6 @@ protected Scheduler( this.workerStateChangeListener = this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener(); } - this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider( - leaseRefresher, Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT); this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis(); this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis(); this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis(); @@ -315,7 +361,6 @@ protected Scheduler( this.coordinatorConfig.schedulerInitializationBackoffTimeMillis(); this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager( leaseManagementConfig.workerIdentifier(), - leaderDecider, leaseRefresher, currentStreamConfigMap, shardSyncTaskManagerProvider, @@ -325,14 +370,69 @@ protected Scheduler( leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(), leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(), leaderSynced); - this.leaseCleanupManager = this.leaseManagementConfig - .leaseManagementFactory(leaseSerializer, isMultiStreamMode) - .createLeaseCleanupManager(metricsFactory); + this.leaseCleanupManager = leaseManagementFactory.createLeaseCleanupManager(metricsFactory); this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null ? null : new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer()); } + /** + * Depends on LeaseCoordinator and LeaseRefresher to be created first + */ + private DynamicMigrationComponentsInitializer createDynamicMigrationComponentsInitializer( + final CoordinatorStateDAO coordinatorStateDAO) { + selectWorkerMetricsIfAvailable(leaseManagementConfig.workerUtilizationAwareAssignmentConfig()); + + final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager( + leaseManagementConfig.workerUtilizationAwareAssignmentConfig().noOfPersistedMetricsPerWorkerMetrics(), + leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricList(), + metricsFactory, + leaseManagementConfig + .workerUtilizationAwareAssignmentConfig() + .inMemoryWorkerMetricsCaptureFrequencyMillis()); + + final WorkerMetricStatsDAO workerMetricsDAO = new WorkerMetricStatsDAO( + leaseManagementConfig.dynamoDBClient(), + leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig(), + leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsReporterFreqInMillis()); + + return DynamicMigrationComponentsInitializer.builder() + .metricsFactory(metricsFactory) + .leaseRefresher(leaseRefresher) + .coordinatorStateDAO(coordinatorStateDAO) + .workerMetricsThreadPool(Executors.newScheduledThreadPool( + 1, + new ThreadFactoryBuilder() + .setNameFormat("worker-metrics-reporter") + .build())) + .workerMetricsDAO(workerMetricsDAO) + .workerMetricsManager(workerMetricsManager) + .lamThreadPool(Executors.newScheduledThreadPool( + 1, + new ThreadFactoryBuilder().setNameFormat("lam-thread").build())) + .lamCreator((lamThreadPool, leaderDecider) -> new LeaseAssignmentManager( + leaseRefresher, + workerMetricsDAO, + leaderDecider, + leaseManagementConfig.workerUtilizationAwareAssignmentConfig(), + leaseCoordinator.workerIdentifier(), + leaseManagementConfig.failoverTimeMillis(), + metricsFactory, + lamThreadPool, + System::nanoTime, + leaseManagementConfig.maxLeasesForWorker(), + leaseManagementConfig.gracefulLeaseHandoffConfig())) + .adaptiveLeaderDeciderCreator(() -> new MigrationAdaptiveLeaderDecider(metricsFactory)) + .deterministicLeaderDeciderCreator(() -> new DeterministicShuffleShardSyncLeaderDecider( + leaseRefresher, Executors.newSingleThreadScheduledExecutor(), 1, metricsFactory)) + .ddbLockBasedLeaderDeciderCreator(() -> DynamoDBLockBasedLeaderDecider.create( + coordinatorStateDAO, leaseCoordinator.workerIdentifier(), metricsFactory)) + .workerIdentifier(leaseCoordinator.workerIdentifier()) + .workerUtilizationAwareAssignmentConfig(leaseManagementConfig.workerUtilizationAwareAssignmentConfig()) + .leaseAssignmentModeProvider(leaseAssignmentModeProvider) + .build(); + } + /** * Start consuming data from the stream, and pass it to the application record processors. */ @@ -342,13 +442,19 @@ public void run() { return; } + final MetricsScope metricsScope = + MetricsUtil.createMetricsWithOperation(metricsFactory, "Scheduler:Initialize"); + boolean success = false; try { initialize(); + success = true; log.info("Initialization complete. Starting worker loop."); } catch (RuntimeException e) { log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e); workerStateChangeListener.onAllInitializationAttemptsFailed(e); shutdown(); + } finally { + MetricsUtil.addSuccess(metricsScope, "Initialize", success, MetricsLevel.SUMMARY); } while (!shouldShutdown()) { runProcessLoop(); @@ -363,14 +469,13 @@ void initialize() { synchronized (lock) { registerErrorHandlerForUndeliverableAsyncTaskExceptions(); workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING); + boolean isDone = false; Exception lastException = null; - for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) { try { log.info("Initializing LeaseCoordinator attempt {}", (i + 1)); leaseCoordinator.initialize(); - if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) { if (shouldInitiateLeaseSync()) { log.info( @@ -382,21 +487,29 @@ void initialize() { log.info("Skipping shard sync per configuration setting (and lease table is not empty)"); } + // Initialize the state machine after lease table has been initialized + // Migration state machine creates and waits for GSI if necessary, + // it must be initialized before starting leaseCoordinator, which runs LeaseDiscoverer + // and that requires GSI to be present and active. (migrationStateMachine.initialize is idempotent) + migrationStateMachine.initialize(); + leaderDecider = migrationComponentsInitializer.leaderDecider(); + leaseCleanupManager.start(); // If we reach this point, then we either skipped the lease sync or did not have any exception // for any of the shard sync in the previous attempt. + if (!leaseCoordinator.isRunning()) { log.info("Starting LeaseCoordinator"); - leaseCoordinator.start(); + leaseCoordinator.start(leaseAssignmentModeProvider); } else { log.info("LeaseCoordinator is already running. No need to start it."); } log.info("Scheduling periodicShardSync"); - leaderElectedPeriodicShardSyncManager.start(); + leaderElectedPeriodicShardSyncManager.start(leaderDecider); streamSyncWatch.start(); isDone = true; - } catch (Exception e) { + } catch (final Exception e) { log.error("Caught exception when initializing LeaseCoordinator", e); lastException = e; } @@ -863,7 +976,7 @@ Callable createWorkerShutdownCallable() { leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch); ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo); - if (consumer != null) { + if (consumer != null && !consumer.isShutdown()) { consumer.gracefulShutdown(shutdownNotification); } else { // @@ -912,6 +1025,8 @@ public void shutdown() { shutdown = true; shutdownStartTimeMillis = System.currentTimeMillis(); + migrationStateMachine.shutdown(); + migrationComponentsInitializer.shutdown(); // Stop lease coordinator, so leases are not renewed or stolen from other workers. // Lost leases will force Worker to begin shutdown process for all shard consumers in // Worker.run(). @@ -1228,4 +1343,23 @@ private void resetInfoLogging() { public Future requestShutdown() { return null; } + + /** + * If WorkerMetricStats list is empty and the disable flag is false, select WorkerMetricStats automatically. + */ + private void selectWorkerMetricsIfAvailable( + final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig) { + try { + if (workerUtilizationAwareAssignmentConfig.workerMetricList().isEmpty() + && !workerUtilizationAwareAssignmentConfig.disableWorkerMetrics()) { + workerUtilizationAwareAssignmentConfig.workerMetricList( + WorkerMetricsSelector.create().getDefaultWorkerMetrics()); + } + } catch (final Exception e) { + log.warn( + "Exception encountered during WorkerMetricStats selection. If this is persistent please try setting the " + + "WorkerMetricStats explicitly.", + e); + } + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java new file mode 100644 index 000000000..07d7af95e --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentDecider.java @@ -0,0 +1,21 @@ +package software.amazon.kinesis.coordinator.assignment; + +import java.util.List; + +import software.amazon.kinesis.leases.Lease; + +public interface LeaseAssignmentDecider { + + /** + * Assigns expiredOrUnAssignedLeases to the available workers. + */ + void assignExpiredOrUnassignedLeases(final List expiredOrUnAssignedLeases); + + /** + * Balances the leases between workers in the fleet. + * Implementation can choose to balance leases based on lease count or throughput or to bring the variance in + * resource utilization to a minimum. + * Check documentation on implementation class to see how it balances the leases. + */ + void balanceWorkerVariance(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java new file mode 100644 index 000000000..6f3d47ab1 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManager.java @@ -0,0 +1,719 @@ +package software.amazon.kinesis.coordinator.assignment; + +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections.CollectionUtils; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseManagementConfig; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; +import software.amazon.kinesis.metrics.NullMetricsScope; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; + +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; + +/** + * Performs the LeaseAssignment for the application. This starts by loading the leases and workerMetrics from the + * storage and then starts by assignment (in-memory) of expired and/or unassigned leases after which it tries to perform + * balancing of load among the workers by re-assign leases. + * In the end, performs actual assignment by writing to storage. + */ +@Slf4j +@RequiredArgsConstructor +@KinesisClientInternalApi +public final class LeaseAssignmentManager { + + /** + * Default number of continuous failure execution after which leadership is released. + */ + private static final int DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER = 3; + + /** + * Default multiplier for LAM frequency with respect to leaseDurationMillis (lease failover millis). + * If leaseDurationMillis is 10000 millis, default LAM frequency is 20000 millis. + */ + private static final int DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER = 2; + + /** + * Default parallelism factor for scaling lease table. + */ + private static final int DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR = 10; + + private static final String FORCE_LEADER_RELEASE_METRIC_NAME = "ForceLeaderRelease"; + + /** + * Default retry attempt for loading leases and workers before giving up. + */ + private static final int DDB_LOAD_RETRY_ATTEMPT = 1; + + /** + * Internal threadpool used to parallely perform assignment operation by calling storage. + */ + private static final ExecutorService LEASE_ASSIGNMENT_CALL_THREAD_POOL = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + + private static final String METRICS_LEASE_ASSIGNMENT_MANAGER = "LeaseAssignmentManager"; + private static final String METRICS_INCOMPLETE_EXPIRED_LEASES_ASSIGNMENT = + "LeaseAssignmentManager.IncompleteExpiredLeasesAssignment"; + public static final int DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD = 2; + + private final LeaseRefresher leaseRefresher; + private final WorkerMetricStatsDAO workerMetricsDAO; + private final LeaderDecider leaderDecider; + private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config; + private final String currentWorkerId; + private final Long leaseDurationMillis; + private final MetricsFactory metricsFactory; + private final ScheduledExecutorService executorService; + private final Supplier nanoTimeProvider; + private final int maxLeasesForWorker; + private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig; + private boolean tookOverLeadershipInThisRun = false; + private final Map prevRunLeasesState = new HashMap<>(); + + private Future managerFuture; + + private int noOfContinuousFailedAttempts = 0; + private int lamRunCounter = 0; + + public synchronized void start() { + if (isNull(managerFuture)) { + // LAM can be dynamically started/stopped and restarted during MigrationStateMachine execution + // so reset the flag to refresh the state before processing during a restart of LAM. + tookOverLeadershipInThisRun = false; + managerFuture = executorService.scheduleWithFixedDelay( + this::performAssignment, + 0L, + leaseDurationMillis * DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER, + TimeUnit.MILLISECONDS); + log.info("Started LeaseAssignmentManager"); + return; + } + log.info("LeaseAssignmentManager already running..."); + } + + public synchronized void stop() { + if (nonNull(managerFuture)) { + log.info("Completed shutdown of LeaseAssignmentManager"); + managerFuture.cancel(true); + managerFuture = null; + return; + } + log.info("LeaseAssignmentManager is not running..."); + } + + /** + * Creates the MetricsScope for given {@param operation} by calling metricsFactory and falls back to + * NullMetricsScope if failed to create MetricsScope. + * @param operation Operation name for MetricsScope + * @return instance of MetricsScope + */ + private MetricsScope createMetricsScope(final String operation) { + try { + return MetricsUtil.createMetricsWithOperation(metricsFactory, operation); + } catch (final Exception e) { + log.error("Failed to create metrics scope defaulting to no metrics.", e); + return new NullMetricsScope(); + } + } + + private void performAssignment() { + + final MetricsScope metricsScope = createMetricsScope(METRICS_LEASE_ASSIGNMENT_MANAGER); + final long startTime = System.currentTimeMillis(); + boolean success = false; + + try { + + // If the current worker is not leader, then do nothing as assignment is executed on leader. + if (!leaderDecider.isLeader(currentWorkerId)) { + log.info("Current worker {} is not a leader, ignore", currentWorkerId); + this.tookOverLeadershipInThisRun = false; + success = true; + return; + } + + if (!this.tookOverLeadershipInThisRun) { + // This means that there was leader change, perform cleanup of state as this is leader switch. + this.tookOverLeadershipInThisRun = true; + this.lamRunCounter = 0; + prepareAfterLeaderSwitch(); + } + log.info("Current worker {} is a leader, performing assignment", currentWorkerId); + + final InMemoryStorageView inMemoryStorageView = new InMemoryStorageView(); + + final long loadStartTime = System.currentTimeMillis(); + inMemoryStorageView.loadInMemoryStorageView(metricsScope); + MetricsUtil.addLatency(metricsScope, "LeaseAndWorkerMetricsLoad", loadStartTime, MetricsLevel.DETAILED); + + publishLeaseAndWorkerCountMetrics(metricsScope, inMemoryStorageView); + final LeaseAssignmentDecider leaseAssignmentDecider = new VarianceBasedLeaseAssignmentDecider( + inMemoryStorageView, + config.dampeningPercentage(), + config.reBalanceThresholdPercentage(), + config.allowThroughputOvershoot()); + + updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout( + inMemoryStorageView.getLeaseList(), inMemoryStorageView.getLeaseTableScanTime()); + + // This does not include the leases from the worker that has expired (based on WorkerMetricStats's + // lastUpdateTime) + // but the lease is not expired (based on the leaseCounter on lease). + // If a worker has died, the lease will be expired and assigned in next iteration. + final List expiredOrUnAssignedLeases = inMemoryStorageView.getLeaseList().stream() + .filter(lease -> lease.isExpired( + TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis), + inMemoryStorageView.getLeaseTableScanTime())) + // marking them for direct reassignment. + .map(l -> l.isExpiredOrUnassigned(true)) + .collect(Collectors.toList()); + + log.info("Total expiredOrUnassignedLeases count : {}", expiredOrUnAssignedLeases.size()); + metricsScope.addData( + "ExpiredLeases", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY); + + final long expiredAndUnassignedLeaseAssignmentStartTime = System.currentTimeMillis(); + leaseAssignmentDecider.assignExpiredOrUnassignedLeases(expiredOrUnAssignedLeases); + MetricsUtil.addLatency( + metricsScope, + "AssignExpiredOrUnassignedLeases", + expiredAndUnassignedLeaseAssignmentStartTime, + MetricsLevel.DETAILED); + + if (!expiredOrUnAssignedLeases.isEmpty()) { + // When expiredOrUnAssignedLeases is not empty, that means + // that we were not able to assign all expired or unassigned leases and hit the maxThroughput + // per worker for all workers. + log.warn("Not able to assign all expiredOrUnAssignedLeases"); + metricsScope.addData( + "LeaseSpillover", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY); + } + + if (shouldRunVarianceBalancing()) { + final long balanceWorkerVarianceStartTime = System.currentTimeMillis(); + final int totalNewAssignmentBeforeWorkerVarianceBalancing = + inMemoryStorageView.leaseToNewAssignedWorkerMap.size(); + leaseAssignmentDecider.balanceWorkerVariance(); + MetricsUtil.addLatency( + metricsScope, "BalanceWorkerVariance", balanceWorkerVarianceStartTime, MetricsLevel.DETAILED); + metricsScope.addData( + "NumOfLeasesReassignment", + inMemoryStorageView.leaseToNewAssignedWorkerMap.size() + - totalNewAssignmentBeforeWorkerVarianceBalancing, + StandardUnit.COUNT, + MetricsLevel.SUMMARY); + } + + if (inMemoryStorageView.leaseToNewAssignedWorkerMap.isEmpty()) { + log.info("No new lease assignment performed in this iteration"); + } + + parallelyAssignLeases(inMemoryStorageView, metricsScope); + printPerWorkerLeases(inMemoryStorageView); + deleteStaleWorkerMetricsEntries(inMemoryStorageView, metricsScope); + success = true; + noOfContinuousFailedAttempts = 0; + } catch (final Exception e) { + log.error("LeaseAssignmentManager failed to perform lease assignment.", e); + noOfContinuousFailedAttempts++; + if (noOfContinuousFailedAttempts >= DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER) { + log.error( + "Failed to perform assignment {} times in a row, releasing leadership from worker : {}", + DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER, + currentWorkerId); + MetricsUtil.addCount(metricsScope, FORCE_LEADER_RELEASE_METRIC_NAME, 1, MetricsLevel.SUMMARY); + leaderDecider.releaseLeadershipIfHeld(); + } + } finally { + MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY); + MetricsUtil.endScope(metricsScope); + } + } + + private boolean shouldRunVarianceBalancing() { + final boolean response = this.lamRunCounter == 0; + /* + To avoid lamRunCounter grow large, keep it within [0,varianceBalancingFrequency). + If varianceBalancingFrequency is 5 lamRunCounter value will be within 0 to 4 and method return true when + lamRunCounter is 0. + */ + this.lamRunCounter = (this.lamRunCounter + 1) % config.varianceBalancingFrequency(); + return response; + } + + /** + * Deletes the WorkerMetricStats entries which are stale(not updated since long time, ref + * {@link LeaseAssignmentManager#isWorkerMetricsEntryStale} for the condition to evaluate staleness) + */ + private void deleteStaleWorkerMetricsEntries( + final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) { + final long startTime = System.currentTimeMillis(); + try { + final List staleWorkerMetricsList = inMemoryStorageView.getWorkerMetricsList().stream() + .filter(this::isWorkerMetricsEntryStale) + .collect(Collectors.toList()); + MetricsUtil.addCount( + metricsScope, "TotalStaleWorkerMetricsEntry", staleWorkerMetricsList.size(), MetricsLevel.DETAILED); + log.info("Number of stale workerMetrics entries : {}", staleWorkerMetricsList.size()); + log.info("Stale workerMetrics list : {}", staleWorkerMetricsList); + + final List> completableFutures = staleWorkerMetricsList.stream() + .map(workerMetrics -> CompletableFuture.supplyAsync( + () -> workerMetricsDAO.deleteMetrics(workerMetrics), LEASE_ASSIGNMENT_CALL_THREAD_POOL)) + .collect(Collectors.toList()); + + CompletableFuture.allOf(completableFutures.toArray(new CompletableFuture[0])) + .join(); + } finally { + MetricsUtil.addLatency(metricsScope, "StaleWorkerMetricsCleanup", startTime, MetricsLevel.DETAILED); + } + } + + /** + * WorkerMetricStats entry is considered stale if the lastUpdateTime of the workerMetrics is older than + * workerMetricsStalenessThreshold * workerMetricsReporterFreqInMillis. + */ + private boolean isWorkerMetricsEntryStale(final WorkerMetricStats workerMetrics) { + return Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now()) + .toMillis() + > config.staleWorkerMetricsEntryCleanupDuration().toMillis(); + } + + private void printPerWorkerLeases(final InMemoryStorageView storageView) { + storageView.getActiveWorkerIdSet().forEach(activeWorkerId -> { + log.info( + "Worker : {} and total leases : {} and totalThroughput : {}", + activeWorkerId, + Optional.ofNullable(storageView.getWorkerToLeasesMap().get(activeWorkerId)) + .orElse(Collections.EMPTY_SET) + .size(), + storageView.getWorkerToTotalAssignedThroughputMap().get(activeWorkerId)); + }); + } + + private void parallelyAssignLeases(final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) { + final AtomicInteger failedAssignmentCounter = new AtomicInteger(0); + final long startTime = System.currentTimeMillis(); + boolean success = false; + try { + CompletableFuture.allOf(inMemoryStorageView.getLeaseToNewAssignedWorkerMap().entrySet().stream() + // ignore leases that are heartbeating and pending graceful shutdown checkpoint. + .filter(entry -> !entry.getKey().blockedOnPendingCheckpoint(getNanoTimeMillis())) + .map(entry -> CompletableFuture.supplyAsync( + () -> { + try { + final Lease lease = entry.getKey(); + if (gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled() + && lease.isEligibleForGracefulShutdown()) { + return handleGracefulLeaseHandoff( + lease, entry.getValue(), failedAssignmentCounter); + } else { + return handleRegularLeaseAssignment( + lease, entry.getValue(), failedAssignmentCounter); + } + } catch (Exception e) { + throw new CompletionException(e); + } + }, + LEASE_ASSIGNMENT_CALL_THREAD_POOL)) + .toArray(CompletableFuture[]::new)) + .join(); + success = true; + } finally { + MetricsUtil.addCount( + metricsScope, "FailedAssignmentCount", failedAssignmentCounter.get(), MetricsLevel.DETAILED); + MetricsUtil.addSuccessAndLatency( + metricsScope, "ParallelyAssignLeases", success, startTime, MetricsLevel.DETAILED); + } + } + + private boolean handleGracefulLeaseHandoff(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final boolean response = leaseRefresher.initiateGracefulLeaseHandoff(lease, newOwner); + if (response) { + // new handoff assignment. add the timeout. + lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis()); + } else { + failedAssignmentCounter.incrementAndGet(); + } + return response; + } + + private boolean handleRegularLeaseAssignment(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final boolean response = leaseRefresher.assignLease(lease, newOwner); + if (response) { + // Successful assignment updates the leaseCounter, update the nanoTime for counter update. + lease.lastCounterIncrementNanos(nanoTimeProvider.get()); + } else { + failedAssignmentCounter.incrementAndGet(); + } + return response; + } + + private void publishLeaseAndWorkerCountMetrics( + final MetricsScope metricsScope, final InMemoryStorageView inMemoryStorageView) { + // Names of the metrics are kept in sync with what is published in LeaseTaker. + metricsScope.addData( + "TotalLeases", inMemoryStorageView.leaseList.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY); + metricsScope.addData( + "NumWorkers", inMemoryStorageView.activeWorkerMetrics.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY); + } + + // Method updates all new leases with currentTime if the counter is updated since last run else keeps whatever + // was prev and update the prevRunLeasesState + private void updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout( + final List leaseList, final Long scanTime) { + for (final Lease lease : leaseList) { + final Lease prevLease = prevRunLeasesState.get(lease.leaseKey()); + + // make sure lease shutdown timeouts are tracked. + if (lease.shutdownRequested()) { + // previous and current leases might have same next and checkpoint owners but there is no + // guarantee that the latest shutdown is the same shutdown in the previous lease for example + // some other leaders change the lease states while this worker waiting for it's LAM run. + // This is the best effort to prevent marking the incorrect timeout. + if (isNull(prevLease) || !prevLease.shutdownRequested() || !isSameOwners(lease, prevLease)) { + // Add new value if previous is null, previous lease is not shutdown pending or the owners + // don't match + lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis()); + } else { + lease.checkpointOwnerTimeoutTimestampMillis(prevLease.checkpointOwnerTimeoutTimestampMillis()); + } + } + + if (isNull(prevLease)) { + lease.lastCounterIncrementNanos( + isNull(lease.actualOwner()) + // This is an unassigned lease, mark as 0L that puts this in first in assignment order + ? 0L + : scanTime); + } else { + lease.lastCounterIncrementNanos( + lease.leaseCounter() > prevLease.leaseCounter() + ? scanTime + : prevLease.lastCounterIncrementNanos()); + } + } + prevRunLeasesState.clear(); + prevRunLeasesState.putAll(leaseList.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity()))); + } + + private void prepareAfterLeaderSwitch() { + prevRunLeasesState.clear(); + noOfContinuousFailedAttempts = 0; + } + + /** + * In memory view of the leases and workerMetrics. + * This class supports queries (e.g., leases assigned to worker or total throughout assigned to worker). + */ + @Getter + class InMemoryStorageView { + + // This is in-memory view of the workerToLeaseMapping, this is updated in-memory before actual + // changes to storage. + private final Map> workerToLeasesMap = new HashMap<>(); + /** + * This is computed initially after the loading leases and then updated when the + * {@link InMemoryStorageView#performLeaseAssignment} is called. + */ + private final Map workerToTotalAssignedThroughputMap = new HashMap<>(); + /** + * Captures the new assignment done during the lifecycle of single run. + */ + private final Map leaseToNewAssignedWorkerMap = new HashMap<>(); + + /** + * List of all leases in the application. + */ + private List leaseList; + /** + * List of workers which are active (i.e., updated metric stats before the threshold ref) + * {@link this#computeWorkerExpiryThresholdInSecond}) + */ + private List activeWorkerMetrics; + /** + * List of all workerMetrics entries from storage. + */ + private List workerMetricsList; + /** + * List of active workers ids. + */ + private Set activeWorkerIdSet; + /** + * Wall time in nanoseconds when the lease table scan was completed. + */ + private long leaseTableScanTime = 0L; + /** + * Average throughput for all workers. + */ + private double targetAverageThroughput; + + /** + * Update {@ref inMemoryWorkerToLeasesMapping} with the change in ownership and update newLeaseAssignmentMap + * + * @param lease lease changing assignment + * @param newOwner new owner of the lease + */ + public void performLeaseAssignment(final Lease lease, final String newOwner) { + final String existingOwner = lease.actualOwner(); + workerToLeasesMap.get(existingOwner).remove(lease); + workerToLeasesMap + .computeIfAbsent(newOwner, owner -> new HashSet<>()) + .add(lease); + updateWorkerThroughput(newOwner, lease.throughputKBps()); + // Remove the same lease throughput from oldOwner + updateWorkerThroughput(existingOwner, -lease.throughputKBps()); + leaseToNewAssignedWorkerMap.put(lease, newOwner); + } + + /** + * Scans the LeaseTable and WorkerMetricStats in parallel and load the data and populate datastructures used + * in lease assignment. + */ + public void loadInMemoryStorageView(final MetricsScope metricsScope) throws Exception { + final CompletableFuture, List>> leaseListFuture = loadLeaseListAsync(); + + final CompletableFuture> workerMetricsFuture = loadWorkerMetricStats(); + + final List workerMetricsFromStorage = workerMetricsFuture.join(); + + final List listOfWorkerIdOfInvalidWorkerMetricsEntry = workerMetricsFromStorage.stream() + .filter(workerMetrics -> !workerMetrics.isValidWorkerMetric()) + .map(WorkerMetricStats::getWorkerId) + .collect(Collectors.toList()); + log.warn("List of workerIds with invalid entries : {}", listOfWorkerIdOfInvalidWorkerMetricsEntry); + if (!listOfWorkerIdOfInvalidWorkerMetricsEntry.isEmpty()) { + metricsScope.addData( + "NumWorkersWithInvalidEntry", + listOfWorkerIdOfInvalidWorkerMetricsEntry.size(), + StandardUnit.COUNT, + MetricsLevel.SUMMARY); + } + + // Valid entries are considered further, for validity of entry refer WorkerMetricStats#isValidWorkerMetrics + this.workerMetricsList = workerMetricsFromStorage.stream() + .filter(WorkerMetricStats::isValidWorkerMetric) + .collect(Collectors.toList()); + + log.info("Total WorkerMetricStats available : {}", workerMetricsList.size()); + final long workerExpiryThreshold = computeWorkerExpiryThresholdInSecond(); + + final long countOfWorkersWithFailingWorkerMetric = workerMetricsList.stream() + .filter(WorkerMetricStats::isAnyWorkerMetricFailing) + .count(); + if (countOfWorkersWithFailingWorkerMetric != 0) { + metricsScope.addData( + "NumWorkersWithFailingWorkerMetric", + countOfWorkersWithFailingWorkerMetric, + StandardUnit.COUNT, + MetricsLevel.SUMMARY); + } + + final Map.Entry, List> leaseListResponse = leaseListFuture.join(); + this.leaseList = leaseListResponse.getKey(); + log.warn("Leases that failed deserialization : {}", leaseListResponse.getValue()); + if (!leaseListResponse.getValue().isEmpty()) { + MetricsUtil.addCount( + metricsScope, + "LeaseDeserializationFailureCount", + leaseListResponse.getValue().size(), + MetricsLevel.SUMMARY); + } + this.leaseTableScanTime = nanoTimeProvider.get(); + log.info("Total Leases available : {}", leaseList.size()); + + final double averageLeaseThroughput = leaseList.stream() + .filter(lease -> nonNull(lease.throughputKBps())) + .mapToDouble(Lease::throughputKBps) + .average() + // If none of the leases has any value, that means its app + // startup time and thus assigns 0 in that case to start with. + .orElse(0D); + /* + * If a workerMetrics has a metric (i.e. has -1 value in last index which denotes failure), + * skip it from activeWorkerMetrics and no new action on it will be done + * (new assignment etc.) until the metric has non -1 value in last index. This is to avoid performing action + * with the stale data on worker. + */ + this.activeWorkerMetrics = workerMetricsList.stream() + .filter(workerMetrics -> workerMetrics.getLastUpdateTime() >= workerExpiryThreshold + && !workerMetrics.isAnyWorkerMetricFailing()) + .collect(Collectors.toList()); + log.info("activeWorkerMetrics : {}", activeWorkerMetrics.size()); + targetAverageThroughput = + averageLeaseThroughput * leaseList.size() / Math.max(1, activeWorkerMetrics.size()); + leaseList.forEach(lease -> { + if (isNull(lease.throughputKBps())) { + // If the lease is unassigned, it will not have any throughput value, use average throughput + // as good enough value to start with. + lease.throughputKBps(averageLeaseThroughput); + } + workerToLeasesMap + .computeIfAbsent(lease.actualOwner(), workerId -> new HashSet<>()) + .add(lease); + updateWorkerThroughput(lease.actualOwner(), lease.throughputKBps()); + }); + + this.activeWorkerIdSet = new HashSet<>(); + // Calculate initial ratio + this.activeWorkerMetrics.forEach(workerMetrics -> { + activeWorkerIdSet.add(workerMetrics.getWorkerId()); + workerMetrics.setEmaAlpha(config.workerMetricsEMAAlpha()); + if (workerMetrics.isUsingDefaultWorkerMetric()) { + setOperatingRangeAndWorkerMetricsDataForDefaultWorker( + workerMetrics, + getTotalAssignedThroughput(workerMetrics.getWorkerId()) / targetAverageThroughput); + } + }); + } + + private void updateWorkerThroughput(final String workerId, final double leaseThroughput) { + double value = workerToTotalAssignedThroughputMap.computeIfAbsent(workerId, worker -> (double) 0L); + workerToTotalAssignedThroughputMap.put(workerId, value + leaseThroughput); + } + + private void setOperatingRangeAndWorkerMetricsDataForDefaultWorker( + final WorkerMetricStats workerMetrics, final Double ratio) { + // for workers with default WorkerMetricStats, the operating range ceiling of 100 represents the + // target throughput. This way, with either heterogeneous or homogeneous fleets + // of explicit WorkerMetricStats and default WorkerMetricStats applications, load will be evenly + // distributed. + log.info( + "Worker [{}] is using default WorkerMetricStats, setting initial utilization ratio to [{}].", + workerMetrics.getWorkerId(), + ratio); + workerMetrics.setOperatingRange(ImmutableMap.of("T", ImmutableList.of(100L))); + workerMetrics.setMetricStats(ImmutableMap.of("T", ImmutableList.of(ratio * 100, ratio * 100))); + } + + /** + * Calculates the value threshold in seconds for a worker to be considered as active. + * If a worker has not updated the WorkerMetricStats entry within this threshold, the worker is not considered + * as active. + * + * @return wall time in seconds + */ + private long computeWorkerExpiryThresholdInSecond() { + final long timeInSeconds = Duration.ofMillis(System.currentTimeMillis() + - DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD + * config.workerMetricsReporterFreqInMillis()) + .getSeconds(); + log.info("WorkerMetricStats expiry time in seconds : {}", timeInSeconds); + return timeInSeconds; + } + + /** + * Looks at inMemoryWorkerToLeasesMapping for lease assignment and figures out if there is room considering + * any new assignment that would have happened. + */ + public boolean isWorkerTotalThroughputLessThanMaxThroughput(final String workerId) { + return getTotalAssignedThroughput(workerId) <= config.maxThroughputPerHostKBps(); + } + + /** + * Looks at inMemoryWorkerToLeasesMapping for lease assignment of a worker and returns true if the worker has + * no leases assigned or less than maxNumberOfLeasesPerHost else false. + */ + public boolean isWorkerAssignedLeasesLessThanMaxLeases(final String workerId) { + final Set assignedLeases = workerToLeasesMap.get(workerId); + if (CollectionUtils.isEmpty(assignedLeases)) { + // There are no leases assigned to the worker, that means its less than maxNumberOfLeasesPerHost. + return true; + } else { + return assignedLeases.size() < maxLeasesForWorker; + } + } + + public Double getTotalAssignedThroughput(final String workerId) { + return workerToTotalAssignedThroughputMap.getOrDefault(workerId, 0D); + } + + private CompletableFuture> loadWorkerMetricStats() { + return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricsDAO::getAllWorkerMetricStats)); + } + + private CompletableFuture, List>> loadLeaseListAsync() { + return CompletableFuture.supplyAsync(() -> loadWithRetry(() -> leaseRefresher.listLeasesParallely( + LEASE_ASSIGNMENT_CALL_THREAD_POOL, DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR))); + } + + private T loadWithRetry(final Callable loadFunction) { + int retryAttempt = 0; + while (true) { + try { + return loadFunction.call(); + } catch (final Exception e) { + if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) { + log.warn( + "Failed to load : {}, retrying", + loadFunction.getClass().getName(), + e); + retryAttempt++; + } else { + throw new CompletionException(e); + } + } + } + } + } + + private long getCheckpointOwnerTimeoutTimestampMillis() { + // this is a future timestamp in millis that the graceful lease handoff shutdown can be considered + // expired. LeaseDurationMillis is used here to account for how long it might take for the + // lease owner to receive the shutdown signal before executing shutdown. + return getNanoTimeMillis() + + gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + + leaseDurationMillis; + } + + private long getNanoTimeMillis() { + // this is not a wall clock time. But if we stick with using this time provider for calculating the elapsed + // time it should be okay to use in checkpoint expiration calculation. + return TimeUnit.NANOSECONDS.toMillis(nanoTimeProvider.get()); + } + + private static boolean isSameOwners(Lease currentLease, Lease previousLease) { + return Objects.equals(currentLease.leaseOwner(), previousLease.leaseOwner()) + && Objects.equals(currentLease.checkpointOwner(), previousLease.checkpointOwner()); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java new file mode 100644 index 000000000..aee56242c --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/assignment/VarianceBasedLeaseAssignmentDecider.java @@ -0,0 +1,348 @@ +package software.amazon.kinesis.coordinator.assignment; + +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; + +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; + +/** + * VarianceBasedLeaseAssignmentDecider + * This implementation of LeaseAssignmentDecider performs lease assignment by considering the WorkerMetricStats values of workers + * with respect to fleet level average of that WorkerMetricStats. + * Rebalanced leases are assigned to workers which has maximum capacity to in terms of throughput to reach fleet level + * across the WorkerMetricStats value. In case of multiple WorkerMetricStats, the capacity to reach fleet level average is determined by outlier + * WorkerMetricStats. + * To minimize the variance, the algorithm picks the fleet level average of the WorkerMetricStats for workers as a + * pivot point and uses it to determine workers to take leases from and then assign to other workers. + * The threshold for considering a worker for re-balance is configurable via + * {@code reBalanceThreshold}. During reassignments the {@code dampeningPercentageValue} is used to achieve + * critical dampening. + */ +@Slf4j +@KinesisClientInternalApi +public final class VarianceBasedLeaseAssignmentDecider implements LeaseAssignmentDecider { + private final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView; + private final int dampeningPercentageValue; + private final int reBalanceThreshold; + private final boolean allowThroughputOvershoot; + private final Map workerMetricsToFleetLevelAverageMap = new HashMap<>(); + private final PriorityQueue assignableWorkerSortedByAvailableCapacity; + private int targetLeasePerWorker; + + public VarianceBasedLeaseAssignmentDecider( + final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView, + final int dampeningPercentageValue, + final int reBalanceThreshold, + final boolean allowThroughputOvershoot) { + this.inMemoryStorageView = inMemoryStorageView; + this.dampeningPercentageValue = dampeningPercentageValue; + this.reBalanceThreshold = reBalanceThreshold; + this.allowThroughputOvershoot = allowThroughputOvershoot; + initialize(); + final Comparator comparator = Comparator.comparingDouble( + workerMetrics -> workerMetrics.computePercentageToReachAverage(workerMetricsToFleetLevelAverageMap)); + this.assignableWorkerSortedByAvailableCapacity = new PriorityQueue<>(comparator.reversed()); + this.assignableWorkerSortedByAvailableCapacity.addAll( + getAvailableWorkersForAssignment(inMemoryStorageView.getActiveWorkerMetrics())); + } + + private void initialize() { + final Map workerMetricsNameToAverage = inMemoryStorageView.getActiveWorkerMetrics().stream() + .flatMap(workerMetrics -> workerMetrics.getMetricStats().keySet().stream() + .map(workerMetricsName -> + new SimpleEntry<>(workerMetricsName, workerMetrics.getMetricStat(workerMetricsName)))) + .collect(Collectors.groupingBy( + SimpleEntry::getKey, HashMap::new, Collectors.averagingDouble(SimpleEntry::getValue))); + + workerMetricsToFleetLevelAverageMap.putAll(workerMetricsNameToAverage); + + final int totalWorkers = + Math.max(inMemoryStorageView.getActiveWorkerMetrics().size(), 1); + this.targetLeasePerWorker = Math.max(inMemoryStorageView.getLeaseList().size() / totalWorkers, 1); + } + + private List getAvailableWorkersForAssignment(final List workerMetricsList) { + // Workers with WorkerMetricStats running hot are also available for assignment as the goal is to balance + // utilization + // always (e.g., if all workers have hot WorkerMetricStats, balance the variance between them too) + return workerMetricsList.stream() + .filter(workerMetrics -> inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput( + workerMetrics.getWorkerId()) + && inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) + .collect(Collectors.toList()); + } + + @Override + public void assignExpiredOrUnassignedLeases(final List expiredOrUnAssignedLeases) { + // Sort the expiredOrUnAssignedLeases using lastCounterIncrementNanos such that leases expired first are + // picked first. + // Unassigned leases have lastCounterIncrementNanos as zero and thus assigned first. + Collections.sort(expiredOrUnAssignedLeases, Comparator.comparing(Lease::lastCounterIncrementNanos)); + final Set assignedLeases = new HashSet<>(); + for (final Lease lease : expiredOrUnAssignedLeases) { + final WorkerMetricStats workerToAssignLease = assignableWorkerSortedByAvailableCapacity.poll(); + if (nonNull(workerToAssignLease)) { + assignLease(lease, workerToAssignLease); + assignedLeases.add(lease); + } else { + log.info("No worker available to assign lease {}", lease.leaseKey()); + break; + } + } + expiredOrUnAssignedLeases.removeAll(assignedLeases); + } + + private List getWorkersToTakeLeasesFromIfRequired( + final List currentWorkerMetrics, + final String workerMetricsName, + final double workerMetricsValueAvg) { + final List workerIdsAboveAverage = new ArrayList<>(); + + final double upperLimit = workerMetricsValueAvg * (1.0D + (double) reBalanceThreshold / 100); + final double lowerLimit = workerMetricsValueAvg * (1.0D - (double) reBalanceThreshold / 100); + + WorkerMetricStats mostLoadedWorker = null; + + log.info("Range for re-balance upper threshold {} and lower threshold {}", upperLimit, lowerLimit); + + boolean shouldTriggerReBalance = false; + for (final WorkerMetricStats workerMetrics : currentWorkerMetrics) { + final double currentWorkerMetricsValue = workerMetrics.getMetricStat(workerMetricsName); + final boolean isCurrentWorkerMetricsAboveOperatingRange = + workerMetrics.isWorkerMetricAboveOperatingRange(workerMetricsName); + /* + If there is any worker, whose WorkerMetricStats value is between +/- reBalanceThreshold % of workerMetricsValueAvg or if + worker's WorkerMetricStats value is above operating range trigger re-balance + */ + if (currentWorkerMetricsValue > upperLimit + || currentWorkerMetricsValue < lowerLimit + || isCurrentWorkerMetricsAboveOperatingRange) { + shouldTriggerReBalance = true; + } + // Perform re-balance on the worker if its above upperLimit or if current WorkerMetricStats is above + // operating range. + if (currentWorkerMetricsValue >= upperLimit || isCurrentWorkerMetricsAboveOperatingRange) { + workerIdsAboveAverage.add(workerMetrics); + } + if (mostLoadedWorker == null + || mostLoadedWorker.getMetricStat(workerMetricsName) < currentWorkerMetricsValue) { + mostLoadedWorker = workerMetrics; + } + } + + /* + If workerIdsAboveAverage is empty that means there is no worker with WorkerMetricStats value above upperLimit so pick + the worker with higher CPU. This can happen when there is worker with WorkerMetricStats value below lowerLimit but + all other workers are within upperLimit. + */ + if (workerIdsAboveAverage.isEmpty()) { + workerIdsAboveAverage.add(mostLoadedWorker); + } + + return shouldTriggerReBalance ? workerIdsAboveAverage : Collections.emptyList(); + } + + /** + * Performs the balancing of the throughput assigned to workers based on the WorkerMetricsValues of worker with respect + * to fleet level average. + * Each WorkerMetricStats is treated independently to determine workers for re-balance computed (computed based on + * reBalanceThreshold) are determined. + * The magnitude of throughput to take is determined by how much worker is away from the average of that WorkerMetricStats + * across fleet and in case of multiple WorkerMetricStats, the one with maximum magnitude of throughput is considered. + */ + @Override + public void balanceWorkerVariance() { + final List activeWorkerMetrics = inMemoryStorageView.getActiveWorkerMetrics(); + + log.info("WorkerMetricStats to corresponding fleet level average : {}", workerMetricsToFleetLevelAverageMap); + log.info("Active WorkerMetricStats : {}", activeWorkerMetrics); + + final Map workerIdToThroughputToTakeMap = new HashMap<>(); + String largestOutlierWorkerMetricsName = ""; + double maxThroughputTake = -1.0D; + + for (final Map.Entry workerMetricsToFleetLevelAverageEntry : + workerMetricsToFleetLevelAverageMap.entrySet()) { + final String workerMetricsName = workerMetricsToFleetLevelAverageEntry.getKey(); + + // Filter workers that does not have current WorkerMetricStats. This is possible if application is adding a + // new WorkerMetricStats and currently in phase of deployment. + final List currentWorkerMetrics = activeWorkerMetrics.stream() + .filter(workerMetrics -> workerMetrics.containsMetricStat(workerMetricsName)) + .collect(Collectors.toList()); + + final double fleetAverageForWorkerMetrics = workerMetricsToFleetLevelAverageEntry.getValue(); + + final List workerToTakeLeasesFrom = getWorkersToTakeLeasesFromIfRequired( + currentWorkerMetrics, workerMetricsName, fleetAverageForWorkerMetrics); + + final Map workerIdToThroughputToTakeForCurrentWorkerMetrics = new HashMap<>(); + double totalThroughputToTakeForCurrentWorkerMetrics = 0D; + for (final WorkerMetricStats workerToTakeLease : workerToTakeLeasesFrom) { + final double workerMetricsValueForWorker = workerToTakeLease.getMetricStat(workerMetricsName); + // Load to take based on the difference compared to the fleet level average + final double loadPercentageToTake = + (workerMetricsValueForWorker - fleetAverageForWorkerMetrics) / workerMetricsValueForWorker; + // Dampen the load based on dampeningPercentageValue + final double dampenedLoadPercentageToTake = + loadPercentageToTake * ((double) dampeningPercentageValue / 100); + final double throughputToTake = + inMemoryStorageView.getTotalAssignedThroughput(workerToTakeLease.getWorkerId()) + * dampenedLoadPercentageToTake; + log.info( + "For worker : {} taking throughput : {} after dampening based on WorkerMetricStats : {}", + workerToTakeLease.getWorkerId(), + throughputToTake, + workerMetricsName); + totalThroughputToTakeForCurrentWorkerMetrics += throughputToTake; + workerIdToThroughputToTakeForCurrentWorkerMetrics.put( + workerToTakeLease.getWorkerId(), throughputToTake); + } + + /* + If totalThroughputToTakeForCurrentWorkerMetrics is more than maxThroughputTake that means this WorkerMetricStats is more + outlier so consider this for reBalancing + */ + if (maxThroughputTake < totalThroughputToTakeForCurrentWorkerMetrics) { + largestOutlierWorkerMetricsName = workerMetricsName; + workerIdToThroughputToTakeMap.clear(); + workerIdToThroughputToTakeMap.putAll(workerIdToThroughputToTakeForCurrentWorkerMetrics); + maxThroughputTake = totalThroughputToTakeForCurrentWorkerMetrics; + } + } + + log.info( + "Largest outlier WorkerMetricStats is : {} and total of {} throughput will be rebalanced", + largestOutlierWorkerMetricsName, + maxThroughputTake); + log.info("Workers to throughput taken from them is : {}", workerIdToThroughputToTakeMap); + + final List> sortedWorkerIdToThroughputToTakeEntries = + new ArrayList<>(workerIdToThroughputToTakeMap.entrySet()); + // sort entries by values. + Collections.sort(sortedWorkerIdToThroughputToTakeEntries, (e1, e2) -> e2.getValue() + .compareTo(e1.getValue())); + + for (final Map.Entry workerIdToThroughputToTakeEntry : + sortedWorkerIdToThroughputToTakeEntries) { + final String workerId = workerIdToThroughputToTakeEntry.getKey(); + + final double throughputToTake = workerIdToThroughputToTakeEntry.getValue(); + + final Queue leasesToTake = getLeasesToTake(workerId, throughputToTake); + + log.info( + "Leases taken from worker : {} are : {}", + workerId, + leasesToTake.stream().map(Lease::leaseKey).collect(Collectors.toSet())); + + for (final Lease lease : leasesToTake) { + final WorkerMetricStats workerToAssign = assignableWorkerSortedByAvailableCapacity.poll(); + if (nonNull(workerToAssign) + && workerToAssign.willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange( + workerMetricsToFleetLevelAverageMap, + inMemoryStorageView.getTargetAverageThroughput(), + lease.throughputKBps(), + targetLeasePerWorker)) { + log.info("No worker to assign anymore in this iteration due to hitting average values"); + break; + } + if (nonNull(workerToAssign)) { + assignLease(lease, workerToAssign); + } + } + } + + printWorkerToUtilizationLog(inMemoryStorageView.getActiveWorkerMetrics()); + } + + private Queue getLeasesToTake(final String workerId, final double throughputToTake) { + final Set existingLeases = + inMemoryStorageView.getWorkerToLeasesMap().get(workerId); + + if (isNull(existingLeases) || existingLeases.isEmpty()) { + return new ArrayDeque<>(); + } + + if (inMemoryStorageView.getTotalAssignedThroughput(workerId) == 0D) { + // This is the case where throughput of this worker is zero and have 1 or more leases assigned. + // Its not possible to determine leases to take based on throughput so simply take 1 lease and move on. + return new ArrayDeque<>(new ArrayList<>(existingLeases).subList(0, 1)); + } + + return getLeasesCombiningToThroughput(workerId, throughputToTake); + } + + private void assignLease(final Lease lease, final WorkerMetricStats workerMetrics) { + if (nonNull(lease.actualOwner()) && lease.actualOwner().equals(workerMetrics.getWorkerId())) { + // if a new owner and current owner are same then no assignment to do + // put back the worker as well as no assignment is done + assignableWorkerSortedByAvailableCapacity.add(workerMetrics); + return; + } + workerMetrics.extrapolateMetricStatValuesForAddedThroughput( + workerMetricsToFleetLevelAverageMap, + inMemoryStorageView.getTargetAverageThroughput(), + lease.throughputKBps(), + targetLeasePerWorker); + log.info("Assigning lease : {} to worker : {}", lease.leaseKey(), workerMetrics.getWorkerId()); + inMemoryStorageView.performLeaseAssignment(lease, workerMetrics.getWorkerId()); + if (inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(workerMetrics.getWorkerId()) + && inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) { + assignableWorkerSortedByAvailableCapacity.add(workerMetrics); + } + } + + private void printWorkerToUtilizationLog(final List activeWorkerMetrics) { + activeWorkerMetrics.forEach(workerMetrics -> log.info( + "WorkerId : {} and average WorkerMetricStats data : {}", + workerMetrics.getWorkerId(), + workerMetrics.getMetricStatsMap())); + } + + private Queue getLeasesCombiningToThroughput(final String workerId, final double throughputToGet) { + final List assignedLeases = + new ArrayList<>(inMemoryStorageView.getWorkerToLeasesMap().get(workerId)); + if (assignedLeases.isEmpty()) { + // This is possible if the worker is having high utilization but does not have any leases assigned to it + return new ArrayDeque<>(); + } + // Shuffle leases to randomize what leases gets picked. + Collections.shuffle(assignedLeases); + final Queue response = new ArrayDeque<>(); + double remainingThroughputToGet = throughputToGet; + for (final Lease lease : assignedLeases) { + // if adding this lease makes throughout to take go below zero avoid taking this lease. + if (remainingThroughputToGet - lease.throughputKBps() <= 0) { + continue; + } + remainingThroughputToGet -= lease.throughputKBps(); + response.add(lease); + } + + // If allowThroughputOvershoot is set to true, take a minimum throughput lease + if (allowThroughputOvershoot && response.isEmpty()) { + assignedLeases.stream() + .min(Comparator.comparingDouble(Lease::throughputKBps)) + .ifPresent(response::add); + } + return response; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java new file mode 100644 index 000000000..ccbd90858 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersion.java @@ -0,0 +1,58 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +/** + * ClientVersion support during upgrade from KCLv2.x to KCLv3.x + * + * This enum is persisted in storage, so any changes to it needs to be backward compatible. + * Reorganizing the values is not backward compatible, also if versions are removed, the corresponding + * enum value cannot be reused without backward compatibility considerations. + */ +public enum ClientVersion { + /** + * This is a transient start state version used during initialization of the Migration State Machine. + */ + CLIENT_VERSION_INIT, + /** + * This version is used during the upgrade of an application from KCLv2.x to KCLv3.x, in this version + * KCL workers will emit WorkerMetricStats and run KCLv2.x algorithms for leader election and lease + * assignment. KCL will also monitor for upgrade to KCLv3.x readiness of the worker fleet. + */ + CLIENT_VERSION_UPGRADE_FROM_2X, + /** + * This version is used during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK, + * which can only be initiated using a KCL migration tool, when customer wants to revert to KCLv2.x functionality. + * In this version, KCL will not emit WorkerMetricStats and run KCLv2.x algorithms for leader election + * and lease assignment. In this version, KCL will monitor for roll-forward scenario where + * client version is updated to CLIENT_VERSION_UPGRADE_FROM_2X using the migration tool. + */ + CLIENT_VERSION_2X, + /** + * When workers are operating in CLIENT_VERSION_UPGRADE_FROM_2X and when worker fleet is determined to be + * KCLv3.x ready (when lease table GSI is active and worker-metrics are being emitted by all lease owners) + * then the leader will initiate the switch to KCLv3.x algorithms for leader election and lease assignment, + * by using this version and persisting it in the {@link MigrationState} that allows all worker hosts + * to also flip to KCLv3.x functionality. In this KCL will also monitor for rollback to detect when the + * customer updates version to CLIENT_VERSION_2X using migration tool, so that it instantly flips back + * to CLIENT_VERSION_2X. + */ + CLIENT_VERSION_3X_WITH_ROLLBACK, + /** + * A new application starting KCLv3.x or an upgraded application from KCLv2.x after upgrade is successful + * can use this version to default all KCLv3.x algorithms without any monitor to rollback. + */ + CLIENT_VERSION_3X; +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java new file mode 100644 index 000000000..29777fa3b --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitor.java @@ -0,0 +1,161 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.time.Duration; +import java.util.Random; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION; + +/** + * Change monitor for MigrationState.clientVersion to notify a callback if the value + * changes from a given value. This monitor will be run to monitor + * rollback, roll-forward and also upgrade to 3.x scenarios. Look at {@link ClientVersion} + * for more details. + * + * Since all KCL workers will be running the monitor, the monitor poll interval uses + * a random jitter to stagger the reads to ddb. + * + * The class is thread-safe and will invoke callback on a separate thread. + */ +@Slf4j +@RequiredArgsConstructor +@ThreadSafe +public class ClientVersionChangeMonitor implements Runnable { + + /** + * Interface of a callback to invoke when monitor condition is true. + */ + public interface ClientVersionChangeCallback { + void accept(final MigrationState currentMigrationState) throws InvalidStateException, DependencyException; + } + + private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis(); + private static final double JITTER_FACTOR = 0.1; + + private final MetricsFactory metricsFactory; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService stateMachineThreadPool; + private final ClientVersionChangeCallback callback; + private final ClientVersion expectedVersion; + private final Random random; + private long monitorIntervalMillis; + + private ScheduledFuture scheduledFuture; + + public synchronized void startMonitor() { + if (scheduledFuture == null) { + final long jitter = (long) (random.nextDouble() * MONITOR_INTERVAL_MILLIS * JITTER_FACTOR); + monitorIntervalMillis = MONITOR_INTERVAL_MILLIS + jitter; + log.info( + "Monitoring for MigrationState client version change from {} every {}ms", + expectedVersion, + monitorIntervalMillis); + scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay( + this, monitorIntervalMillis, monitorIntervalMillis, TimeUnit.MILLISECONDS); + } + } + + @Override + public String toString() { + return new StringBuilder(getClass().getSimpleName()) + .append("[") + .append(expectedVersion) + .append("]") + .toString(); + } + + /** + * Cancel the monitor explicity before the condition is met, e.g. when the worker is going down. + * Note on synchronization: callback of this monitor is invoked while holding the lock on this monitor object. + * If cancel is called from within the same lock context that callback uses, then it can lead to + * deadlock. Ensure synchronization context between callback the caller of cancel is not shared. + */ + public synchronized void cancel() { + if (scheduledFuture != null) { + log.info("Cancelling {}", this); + scheduledFuture.cancel(false); + } else { + log.info("Monitor {} is not running", this); + } + } + + @Override + public synchronized void run() { + try { + if (scheduledFuture == null) { + log.debug("Monitor has been cancelled, not running..."); + return; + } + + final MigrationState migrationState = + (MigrationState) coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY); + if (migrationState != null) { + if (migrationState.getClientVersion() != expectedVersion) { + log.info("MigrationState client version has changed {}, invoking monitor callback", migrationState); + callback.accept(migrationState); + log.info("Callback successful, monitoring cancelling itself."); + // stop further monitoring + scheduledFuture.cancel(false); + scheduledFuture = null; + } else { + emitMetrics(); + log.debug("No change detected {}", this); + } + } + } catch (final Exception e) { + log.warn( + "Exception occurred when monitoring for client version change from {}, will retry in {}", + expectedVersion, + monitorIntervalMillis, + e); + } + } + + private void emitMetrics() { + final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION); + try { + switch (expectedVersion) { + case CLIENT_VERSION_3X_WITH_ROLLBACK: + scope.addData("CurrentState:3xWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + break; + case CLIENT_VERSION_2X: + case CLIENT_VERSION_UPGRADE_FROM_2X: + scope.addData("CurrentState:2xCompatibleWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + break; + default: + throw new IllegalStateException(String.format("Unexpected version %s", expectedVersion.name())); + } + } finally { + MetricsUtil.endScope(scope); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java new file mode 100644 index 000000000..45d29a413 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion2xState.java @@ -0,0 +1,159 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.Random; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledExecutorService; + +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION; + +/** + * State for CLIENT_VERSION_2X. In this state, the only allowed valid transition is + * the roll-forward scenario which can only be performed using the KCL Migration tool. + * So when the state machine enters this state, a monitor is started to detect the + * roll-forward scenario. + */ +@KinesisClientInternalApi +@RequiredArgsConstructor +@Slf4j +@ThreadSafe +public class MigrationClientVersion2xState implements MigrationClientVersionState { + private final MigrationStateMachine stateMachine; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService stateMachineThreadPool; + private final DynamicMigrationComponentsInitializer initializer; + private final Random random; + + private ClientVersionChangeMonitor rollForwardMonitor; + private boolean entered = false; + private boolean left = false; + + @Override + public ClientVersion clientVersion() { + return CLIENT_VERSION_2X; + } + + @Override + public synchronized void enter(final ClientVersion fromClientVersion) { + if (!entered) { + log.info("Entering {} from {}", this, fromClientVersion); + initializer.initializeClientVersionFor2x(fromClientVersion); + + log.info("Starting roll-forward monitor"); + rollForwardMonitor = new ClientVersionChangeMonitor( + initializer.metricsFactory(), + coordinatorStateDAO, + stateMachineThreadPool, + this::onClientVersionChange, + clientVersion(), + random); + rollForwardMonitor.startMonitor(); + entered = true; + } else { + log.info("Not entering {}", left ? "already exited state" : "already entered state"); + } + } + + @Override + public synchronized void leave() { + if (entered && !left) { + log.info("Leaving {}", this); + cancelRollForwardMonitor(); + left = false; + } else { + log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active"); + } + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } + + /** + * Callback handler to handle client version changes in MigrationState in DDB. + * @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_2X + * @throws InvalidStateException during transition to the next state based on the new ClientVersion + * or if the new state in DDB is unexpected. + */ + private synchronized void onClientVersionChange(@NonNull final MigrationState newState) + throws InvalidStateException, DependencyException { + if (!entered || left) { + log.warn("Received client version change notification on inactive state {}", this); + return; + } + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION); + try { + if (newState.getClientVersion() == CLIENT_VERSION_UPGRADE_FROM_2X) { + log.info( + "A roll-forward has been initiated for the application. Transition to {}", + CLIENT_VERSION_UPGRADE_FROM_2X); + // If this succeeds, the monitor will cancel itself. + stateMachine.transitionTo(CLIENT_VERSION_UPGRADE_FROM_2X, newState); + } else { + // This should not happen, so throw an exception that allows the monitor to continue monitoring + // changes, this allows KCL to operate in the current state and keep monitoring until a valid + // state transition is possible. + // However, there could be a split brain here, new workers will use DDB value as source of truth, + // so we could also write back CLIENT_VERSION_2X to DDB to ensure all workers have consistent + // behavior. + // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope, + // so keeping it simple and not writing back to DDB, the error log below would help capture + // any strange behavior if this happens. + log.error( + "Migration state has invalid client version {}. Transition from {} is not supported", + newState, + CLIENT_VERSION_2X); + throw new InvalidStateException(String.format("Unexpected new state %s", newState)); + } + } catch (final InvalidStateException | DependencyException e) { + scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + throw e; + } finally { + MetricsUtil.endScope(scope); + } + } + + private void cancelRollForwardMonitor() { + if (rollForwardMonitor != null) { + final ClientVersionChangeMonitor localRollForwardMonitor = rollForwardMonitor; + CompletableFuture.supplyAsync(() -> { + log.info("Cancelling roll-forward monitor"); + localRollForwardMonitor.cancel(); + return null; + }); + rollForwardMonitor = null; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java new file mode 100644 index 000000000..1e8573111 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xState.java @@ -0,0 +1,70 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.leases.exceptions.DependencyException; + +/** + * State for CLIENT_VERSION_3X which enables KCL to run 3.x algorithms on new KCLv3.x application + * or successfully upgraded application which upgraded from v2.x. This is a terminal state of the + * state machine and no rollbacks are supported in this state. + */ +@KinesisClientInternalApi +@RequiredArgsConstructor +@Slf4j +@ThreadSafe +public class MigrationClientVersion3xState implements MigrationClientVersionState { + private final MigrationStateMachine stateMachine; + private final DynamicMigrationComponentsInitializer initializer; + private boolean entered = false; + private boolean left = false; + + @Override + public ClientVersion clientVersion() { + return ClientVersion.CLIENT_VERSION_3X; + } + + @Override + public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException { + if (!entered) { + log.info("Entering {} from {}", this, fromClientVersion); + initializer.initializeClientVersionFor3x(fromClientVersion); + entered = true; + } else { + log.info("Not entering {}", left ? "already exited state" : "already entered state"); + } + } + + @Override + public void leave() { + if (entered && !left) { + log.info("Leaving {}", this); + entered = false; + left = true; + } else { + log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active"); + } + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java new file mode 100644 index 000000000..6235c5a93 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersion3xWithRollbackState.java @@ -0,0 +1,156 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.Random; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledExecutorService; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION; + +/** + * State for CLIENT_VERSION_3X_WITH_ROLLBACK which enables KCL to run its 3.x compliant algorithms + * during the upgrade process after all KCL workers in the fleet are 3.x complaint. Since this + * is an instant switch from CLIENT_VERSION_UPGRADE_FROM_2X, it also supports rollback if customers + * see regression to allow for instant rollbacks as well. This would be achieved by customers + * running a KCL migration tool to update MigrationState in DDB. So this state monitors for + * rollback triggers and performs state transitions accordingly. + */ +@Slf4j +@KinesisClientInternalApi +@RequiredArgsConstructor +@ThreadSafe +public class MigrationClientVersion3xWithRollbackState implements MigrationClientVersionState { + + private final MigrationStateMachine stateMachine; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService stateMachineThreadPool; + private final DynamicMigrationComponentsInitializer initializer; + private final Random random; + + private ClientVersionChangeMonitor rollbackMonitor; + private boolean entered; + private boolean left; + + @Override + public ClientVersion clientVersion() { + return ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK; + } + + @Override + public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException { + if (!entered) { + log.info("Entering {} from {}", this, fromClientVersion); + initializer.initializeClientVersionFor3xWithRollback(fromClientVersion); + // we need to run the rollback monitor + log.info("Starting rollback monitor"); + rollbackMonitor = new ClientVersionChangeMonitor( + initializer.metricsFactory(), + coordinatorStateDAO, + stateMachineThreadPool, + this::onClientVersionChange, + clientVersion(), + random); + rollbackMonitor.startMonitor(); + entered = true; + } else { + log.info("Not entering {}", left ? "already exited state" : "already entered state"); + } + } + + @Override + public void leave() { + if (entered && !left) { + log.info("Leaving {}", this); + cancelRollbackMonitor(); + entered = false; + left = true; + } else { + log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active"); + } + } + + private synchronized void onClientVersionChange(final MigrationState newState) + throws InvalidStateException, DependencyException { + if (!entered || left) { + log.warn("Received client version change notification on inactive state {}", this); + return; + } + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION); + try { + switch (newState.getClientVersion()) { + case CLIENT_VERSION_2X: + log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X); + stateMachine.transitionTo(ClientVersion.CLIENT_VERSION_2X, newState); + break; + case CLIENT_VERSION_3X: + log.info("Customer has switched to 3.x after successful upgrade, state machine will move to a" + + "terminal state and stop monitoring. Rollbacks will no longer be supported anymore"); + stateMachine.transitionTo(CLIENT_VERSION_3X, newState); + // This worker will still be running the migrationAdaptive components in 3.x mode which will + // no longer dynamically switch back to 2.x mode, however to directly run 3.x component without + // adaption to migration (i.e. move to CLIENT_VERSION_3X state), it requires this worker to go + // through the current deployment which initiated the switch to 3.x mode. + break; + default: + // This should not happen, so throw an exception that allows the monitor to continue monitoring + // changes, this allows KCL to operate in the current state and keep monitoring until a valid + // state transition is possible. + // However, there could be a split brain here, new workers will use DDB value as source of truth, + // so we could also write back CLIENT_VERSION_3X_WITH_ROLLBACK to DDB to ensure all workers have + // consistent behavior. + // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope, + // so keeping it simple and not writing back to DDB, the error log below would help capture + // any strange behavior if this happens. + log.error("Migration state has invalid client version {}", newState); + throw new InvalidStateException(String.format("Unexpected new state %s", newState)); + } + } catch (final InvalidStateException | DependencyException e) { + scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + throw e; + } finally { + MetricsUtil.endScope(scope); + } + } + + private void cancelRollbackMonitor() { + if (rollbackMonitor != null) { + final ClientVersionChangeMonitor localRollbackMonitor = rollbackMonitor; + CompletableFuture.supplyAsync(() -> { + log.info("Cancelling rollback monitor"); + localRollbackMonitor.cancel(); + return null; + }); + rollbackMonitor = null; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java new file mode 100644 index 000000000..c1d8507ed --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionState.java @@ -0,0 +1,47 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import software.amazon.kinesis.leases.exceptions.DependencyException; + +/** + * Interface of a state implementation for the MigrationStateMachine + */ +public interface MigrationClientVersionState { + + /** + * The associated clientVersion this state corresponds to + * @return ClientVersion that this state implements the logic for. + */ + ClientVersion clientVersion(); + + /** + * Enter the state and perform the business logic of being in this state + * which includes performing any monitoring that allows the next state + * transition and also initializing the KCL based on the ClientVersion. + * @param fromClientVersion from previous state if any specific action must + * be taken based on the state from which this state + * is being entered from. + * @throws DependencyException if DDB fails in unexpected ways for those states + * that create the GSI + */ + void enter(ClientVersion fromClientVersion) throws DependencyException; + + /** + * Invoked after the transition to another state has occurred + * to allow printing any helpful logs or performing cleanup. + */ + void leave(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java new file mode 100644 index 000000000..970bd6ede --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionStateInitializer.java @@ -0,0 +1,263 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.AbstractMap.SimpleEntry; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.Callable; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig; +import software.amazon.kinesis.coordinator.CoordinatorState; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; + +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X; +import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY; + +/** + * Initializer to determine start state of the state machine which identifies the + * state to initialize KCL when it is starting up. The initial state is determined based on the + * customer configured {@link ClientVersionConfig} and the current {@link MigrationState} in DDB, + * as follows + * ClientVersionConfig | MigrationState (DDB) | initial client version + * --------------------+---------------------------------+-------------------------------- + * COMPATIBLE_WITH_2X | Does not exist | CLIENT_VERSION_UPGRADE_FROM_2X + * 3X | Does not exist | CLIENT_VERSION_3X + * COMPATIBLE_WITH_2X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X_WITH_ROLLBACK + * 3X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X + * any | CLIENT_VERSION_2X | CLIENT_VERSION_2X + * any | CLIENT_VERSION_UPGRADE_FROM_2X | CLIENT_VERSION_UPGRADE_FROM_2X + * any | CLIENT_VERSION_3X | CLIENT_VERSION_3X + */ +@KinesisClientInternalApi +@RequiredArgsConstructor +@Slf4j +@ThreadSafe +public class MigrationClientVersionStateInitializer { + private static final int MAX_INITIALIZATION_RETRY = 10; + private static final long INITIALIZATION_RETRY_DELAY_MILLIS = 1000L; + /** + * A jitter factor of 10% to stagger the retries. + */ + private static final double JITTER_FACTOR = 0.1; + + private final Callable timeProvider; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ClientVersionConfig clientVersionConfig; + private final Random random; + private final String workerIdentifier; + + public SimpleEntry getInitialState() throws DependencyException { + log.info("Initializing migration state machine starting state, configured version {}", clientVersionConfig); + + try { + MigrationState migrationState = getMigrationStateFromDynamo(); + int retryCount = 0; + while (retryCount++ < MAX_INITIALIZATION_RETRY) { + final ClientVersion initialClientVersion = getClientVersionForInitialization(migrationState); + if (migrationState.getClientVersion() != initialClientVersion) { + // If update fails, the value represents current state in dynamo + migrationState = updateMigrationStateInDynamo(migrationState, initialClientVersion); + if (migrationState.getClientVersion() == initialClientVersion) { + // update succeeded. Transition to the state + return new SimpleEntry<>(initialClientVersion, migrationState); + } + final long delay = getInitializationRetryDelay(); + log.warn( + "Failed to update migration state with {}, retry after delay {}", + initialClientVersion, + delay); + safeSleep(delay); + } else { + return new SimpleEntry<>(initialClientVersion, migrationState); + } + } + } catch (final InvalidStateException e) { + log.error("Unable to initialize state machine", e); + } + throw new DependencyException( + new RuntimeException("Unable to determine initial state for migration state machine")); + } + + public ClientVersion getClientVersionForInitialization(final MigrationState migrationState) { + final ClientVersion nextClientVersion; + switch (migrationState.getClientVersion()) { + case CLIENT_VERSION_INIT: + // There is no state in DDB, set state to config version and transition to configured version. + nextClientVersion = getNextClientVersionBasedOnConfigVersion(); + log.info("Application is starting in {}", nextClientVersion); + break; + case CLIENT_VERSION_3X_WITH_ROLLBACK: + if (clientVersionConfig == ClientVersionConfig.CLIENT_VERSION_CONFIG_3X) { + // upgrade successful, allow transition to 3x. + log.info("Application has successfully upgraded, transitioning to {}", CLIENT_VERSION_3X); + nextClientVersion = CLIENT_VERSION_3X; + break; + } + log.info("Initialize with {}", CLIENT_VERSION_3X_WITH_ROLLBACK); + nextClientVersion = migrationState.getClientVersion(); + break; + case CLIENT_VERSION_2X: + log.info("Application has rolled-back, initialize with {}", CLIENT_VERSION_2X); + nextClientVersion = migrationState.getClientVersion(); + break; + case CLIENT_VERSION_UPGRADE_FROM_2X: + log.info("Application is upgrading, initialize with {}", CLIENT_VERSION_UPGRADE_FROM_2X); + nextClientVersion = migrationState.getClientVersion(); + break; + case CLIENT_VERSION_3X: + log.info("Initialize with {}", CLIENT_VERSION_3X); + nextClientVersion = migrationState.getClientVersion(); + break; + default: + throw new IllegalStateException(String.format("Unknown version in DDB %s", migrationState)); + } + return nextClientVersion; + } + + /** + * Update the migration state's client version in dynamo conditional on the current client version + * in dynamo. So that if another worker updates the value first, the update fails. If the update fails, + * the method will read the latest value and return so that initialization can be retried. + * If the value does not exist in dynamo, it will creat it. + */ + private MigrationState updateMigrationStateInDynamo( + final MigrationState migrationState, final ClientVersion nextClientVersion) throws InvalidStateException { + try { + if (migrationState.getClientVersion() == ClientVersion.CLIENT_VERSION_INIT) { + migrationState.update(nextClientVersion, workerIdentifier); + log.info("Creating {}", migrationState); + final boolean created = coordinatorStateDAO.createCoordinatorStateIfNotExists(migrationState); + if (!created) { + log.debug("Create {} did not succeed", migrationState); + return getMigrationStateFromDynamo(); + } + } else { + log.info("Updating {} with {}", migrationState, nextClientVersion); + final Map expectations = + migrationState.getDynamoClientVersionExpectation(); + migrationState.update(nextClientVersion, workerIdentifier); + final boolean updated = + coordinatorStateDAO.updateCoordinatorStateWithExpectation(migrationState, expectations); + if (!updated) { + log.debug("Update {} did not succeed", migrationState); + return getMigrationStateFromDynamo(); + } + } + return migrationState; + } catch (final ProvisionedThroughputException | DependencyException e) { + log.debug( + "Failed to update migration state {} with {}, return previous value to trigger a retry", + migrationState, + nextClientVersion, + e); + return migrationState; + } + } + + private ClientVersion getNextClientVersionBasedOnConfigVersion() { + switch (clientVersionConfig) { + case CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X: + return CLIENT_VERSION_UPGRADE_FROM_2X; + case CLIENT_VERSION_CONFIG_3X: + return CLIENT_VERSION_3X; + } + throw new IllegalStateException(String.format("Unknown configured Client version %s", clientVersionConfig)); + } + + /** + * Read the current {@link MigrationState} from DDB with retries. + * @return current Migration state from DDB, if none exists, an initial Migration State with CLIENT_VERSION_INIT + * will be returned + * @throws InvalidStateException, this occurs when dynamo table does not exist in which retrying is not useful. + */ + private MigrationState getMigrationStateFromDynamo() throws InvalidStateException { + return executeCallableWithRetryAndJitter( + () -> { + final CoordinatorState state = coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY); + if (state == null) { + log.info("No Migration state available in DDB"); + return new MigrationState(MIGRATION_HASH_KEY, workerIdentifier); + } + if (state instanceof MigrationState) { + log.info("Current migration state in DDB {}", state); + return (MigrationState) state; + } + throw new InvalidStateException( + String.format("Unexpected state found not confirming to MigrationState schema %s", state)); + }, + "get MigrationState from DDB"); + } + + /** + * Helper method to retry a given callable upto MAX_INITIALIZATION_RETRY times for all retryable exceptions. + * It considers InvalidStateException as non-retryable exception. During retry, it will compute a delay + * with jitter before retrying. + * @param callable callable to invoke either until it succeeds or max retry attempts exceed. + * @param description a meaningful description to log exceptions + * @return the value returned by the callable + * @param Return type of the callable + * @throws InvalidStateException If the callable throws InvalidStateException, it will not be retried and will + * be thrown back. + */ + private T executeCallableWithRetryAndJitter(final Callable callable, final String description) + throws InvalidStateException { + int retryCount = 0; + while (retryCount++ < MAX_INITIALIZATION_RETRY) { + try { + return callable.call(); + } catch (final Exception e) { + if (e instanceof InvalidStateException) { + // throw the non-retryable exception + throw (InvalidStateException) e; + } + final long delay = getInitializationRetryDelay(); + log.warn("Failed to {}, retry after delay {}", description, delay, e); + + safeSleep(delay); + } + } + throw new RuntimeException( + String.format("Failed to %s after %d retries, giving up", description, MAX_INITIALIZATION_RETRY)); + } + + private void safeSleep(final long delay) { + try { + Thread.sleep(delay); + } catch (final InterruptedException ie) { + log.debug("Interrupted sleep during state machine initialization retry"); + } + } + + /** + * Generate a delay with jitter that is factor of the interval. + * @return delay with jitter + */ + private long getInitializationRetryDelay() { + final long jitter = (long) (random.nextDouble() * JITTER_FACTOR * INITIALIZATION_RETRY_DELAY_MILLIS); + return INITIALIZATION_RETRY_DELAY_MILLIS + jitter; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java new file mode 100644 index 000000000..86106a079 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationClientVersionUpgradeFrom2xState.java @@ -0,0 +1,241 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledExecutorService; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X; +import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC; +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION; + +/** + * State for CLIENT_VERSION_UPGRADE_FROM_2X. When state machine enters this state, + * KCL is initialized to operate in dual mode for Lease assignment and Leader decider algorithms + * which initially start in 2.x compatible mode and when all the KCL workers are 3.x compliant, + * it dynamically switches to the 3.x algorithms. It also monitors for rollback + * initiated from customer via the KCL migration tool and instantly switches back to the 2.x + * complaint algorithms. + * The allowed state transitions are to CLIENT_VERSION_3X_WITH_ROLLBACK when KCL workers are + * 3.x complaint, and to CLIENT_VERSION_2X when customer has initiated a rollback. + * Only the leader KCL worker performs migration ready monitor and notifies all workers (including + * itself) via a MigrationState update. When all worker's monitor notice the MigrationState change + * (including itself), it will transition to CLIENT_VERSION_3X_WITH_ROLLBACK. + */ +@KinesisClientInternalApi +@RequiredArgsConstructor +@Slf4j +@ThreadSafe +public class MigrationClientVersionUpgradeFrom2xState implements MigrationClientVersionState { + private final MigrationStateMachine stateMachine; + private final Callable timeProvider; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService stateMachineThreadPool; + private final DynamicMigrationComponentsInitializer initializer; + private final Random random; + private final MigrationState currentMigrationState; + private final long flipTo3XStabilizerTimeInSeconds; + + private MigrationReadyMonitor migrationMonitor; + private ClientVersionChangeMonitor clientVersionChangeMonitor; + private boolean entered = false; + private boolean left = false; + + @Override + public ClientVersion clientVersion() { + return ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X; + } + + @Override + public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException { + if (!entered) { + log.info("Entering state {} from {}", this, fromClientVersion); + initializer.initializeClientVersionForUpgradeFrom2x(fromClientVersion); + + log.info("Starting migration ready monitor to monitor 3.x compliance of the KCL workers"); + migrationMonitor = new MigrationReadyMonitor( + initializer.metricsFactory(), + timeProvider, + initializer.leaderDecider(), + initializer.workerIdentifier(), + initializer.workerMetricsDAO(), + initializer.workerMetricsExpirySeconds(), + initializer.leaseRefresher(), + stateMachineThreadPool, + this::onMigrationReady, + flipTo3XStabilizerTimeInSeconds); + migrationMonitor.startMonitor(); + + log.info("Starting monitor for rollback and flip to 3.x"); + clientVersionChangeMonitor = new ClientVersionChangeMonitor( + initializer.metricsFactory(), + coordinatorStateDAO, + stateMachineThreadPool, + this::onClientVersionChange, + clientVersion(), + random); + clientVersionChangeMonitor.startMonitor(); + entered = true; + } else { + log.info("Not entering {}", left ? "already exited state" : "already entered state"); + } + } + + @Override + public synchronized void leave() { + if (entered && !left) { + log.info("Leaving {}", this); + cancelMigrationReadyMonitor(); + cancelClientChangeVersionMonitor(); + entered = false; + } else { + log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active"); + } + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } + + private synchronized void onMigrationReady() { + // this is invoked on the leader worker only + if (!entered || left || migrationMonitor == null) { + log.info("Ignoring migration ready monitor, state already transitioned"); + return; + } + // update dynamo with the state to toggle to 3.x + // and let the clientVersionChange kick in to do state transition + // this way both leader and non-leader worker all transition when + // it discovers the update from ddb. + if (updateDynamoStateForTransition()) { + // successfully toggled the state, now we can cancel the monitor + cancelMigrationReadyMonitor(); + } + // else - either migration ready monitor will retry or + // client Version change callback will initiate the next state transition. + } + + private void cancelMigrationReadyMonitor() { + if (migrationMonitor != null) { + final MigrationReadyMonitor localMigrationMonitor = migrationMonitor; + CompletableFuture.supplyAsync(() -> { + log.info("Cancelling migration ready monitor"); + localMigrationMonitor.cancel(); + return null; + }); + migrationMonitor = null; + } + } + + private void cancelClientChangeVersionMonitor() { + if (clientVersionChangeMonitor != null) { + final ClientVersionChangeMonitor localClientVersionChangeMonitor = clientVersionChangeMonitor; + CompletableFuture.supplyAsync(() -> { + log.info("Cancelling client change version monitor"); + localClientVersionChangeMonitor.cancel(); + return null; + }); + clientVersionChangeMonitor = null; + } + } + + /** + * Callback handler to handle client version changes in MigrationState in DDB. + * @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_UPGRADE_FROM_2X + * @throws InvalidStateException during transition to the next state based on the new ClientVersion + * or if the new state in DDB is unexpected. + */ + private synchronized void onClientVersionChange(final MigrationState newState) + throws InvalidStateException, DependencyException { + if (!entered || left) { + log.warn("Received client version change notification on inactive state {}", this); + return; + } + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION); + try { + switch (newState.getClientVersion()) { + case CLIENT_VERSION_2X: + log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X); + // cancel monitor asynchronously + cancelMigrationReadyMonitor(); + stateMachine.transitionTo(CLIENT_VERSION_2X, newState); + break; + case CLIENT_VERSION_3X_WITH_ROLLBACK: + log.info("KCL workers are v3.x compliant, transition to {}", CLIENT_VERSION_3X_WITH_ROLLBACK); + cancelMigrationReadyMonitor(); + stateMachine.transitionTo(CLIENT_VERSION_3X_WITH_ROLLBACK, newState); + break; + default: + // This should not happen, so throw an exception that allows the monitor to continue monitoring + // changes, this allows KCL to operate in the current state and keep monitoring until a valid + // state transition is possible. + // However, there could be a split brain here, new workers will use DDB value as source of truth, + // so we could also write back CLIENT_VERSION_UPGRADE_FROM_2X to DDB to ensure all workers have + // consistent behavior. + // Ideally we don't expect modifications to DDB table out of the KCL migration tool scope, + // so keeping it simple and not writing back to DDB, the error log below would help capture + // any strange behavior if this happens. + log.error("Migration state has invalid client version {}", newState); + throw new InvalidStateException(String.format("Unexpected new state %s", newState)); + } + } catch (final DependencyException | InvalidStateException e) { + scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + throw e; + } finally { + MetricsUtil.endScope(scope); + } + } + + private boolean updateDynamoStateForTransition() { + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION); + try { + final MigrationState newMigrationState = currentMigrationState + .copy() + .update(CLIENT_VERSION_3X_WITH_ROLLBACK, initializer.workerIdentifier()); + log.info("Updating Migration State in DDB with {} prev state {}", newMigrationState, currentMigrationState); + return coordinatorStateDAO.updateCoordinatorStateWithExpectation( + newMigrationState, currentMigrationState.getDynamoClientVersionExpectation()); + } catch (final Exception e) { + log.warn( + "Exception occurred when toggling to {}, upgradeReadyMonitor will retry the update" + + " if upgrade condition is still true", + CLIENT_VERSION_3X_WITH_ROLLBACK, + e); + scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + return false; + } finally { + MetricsUtil.endScope(scope); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java new file mode 100644 index 000000000..3410c932a --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitor.java @@ -0,0 +1,352 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.time.Duration; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; + +import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION; + +/** + * Monitor for KCL workers 3.x readiness. This monitor is started on all workers but only + * executed on the leader of the fleet. The leader determines 3.x readiness if GSI of the lease + * table is active and all lease owners are emitting WorkerMetricStats. The monitor performs this + * check periodically and will invoke callback if the readiness conditions are true. Monitor + * needs to be explicitly cancelled after the readiness trigger has successfully been handled. + * + * Thread safety - Guard for safety against public method invocation and internal runnable method. + */ +@Slf4j +@ThreadSafe +public class MigrationReadyMonitor implements Runnable { + private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis(); + private static final long LOG_INTERVAL_NANOS = Duration.ofMinutes(5).toNanos(); + + /** + * Default retry attempt for loading leases and workers before giving up. + */ + private static final int DDB_LOAD_RETRY_ATTEMPT = 1; + + private final MetricsFactory metricsFactory; + private final Callable timeProvider; + private final LeaderDecider leaderDecider; + private final String currentWorkerId; + private final WorkerMetricStatsDAO workerMetricStatsDAO; + private final long workerMetricStatsExpirySeconds; + private final LeaseRefresher leaseRefresher; + private final ScheduledExecutorService stateMachineThreadPool; + private final MonitorTriggerStabilizer triggerStabilizer; + + private final LogRateLimiter rateLimitedStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS); + private ScheduledFuture scheduledFuture; + private boolean gsiStatusReady; + private boolean workerMetricsReady; + private Set lastKnownUniqueLeaseOwners = new HashSet<>(); + private Set lastKnownWorkersWithActiveWorkerMetrics = new HashSet<>(); + + public MigrationReadyMonitor( + final MetricsFactory metricsFactory, + final Callable timeProvider, + final LeaderDecider leaderDecider, + final String currentWorkerId, + final WorkerMetricStatsDAO workerMetricStatsDAO, + final long workerMetricsExpirySeconds, + final LeaseRefresher leaseRefresher, + final ScheduledExecutorService stateMachineThreadPool, + final Runnable callback, + final long callbackStabilizationInSeconds) { + this.metricsFactory = metricsFactory; + this.timeProvider = timeProvider; + this.leaderDecider = leaderDecider; + this.currentWorkerId = currentWorkerId; + this.workerMetricStatsDAO = workerMetricStatsDAO; + this.workerMetricStatsExpirySeconds = workerMetricsExpirySeconds; + this.leaseRefresher = leaseRefresher; + this.stateMachineThreadPool = stateMachineThreadPool; + this.triggerStabilizer = + new MonitorTriggerStabilizer(timeProvider, callbackStabilizationInSeconds, callback, currentWorkerId); + } + + public synchronized void startMonitor() { + if (Objects.isNull(scheduledFuture)) { + + log.info("Starting migration ready monitor"); + scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay( + this, MONITOR_INTERVAL_MILLIS, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS); + } else { + log.info("Ignoring monitor request, since it is already started"); + } + } + + /** + * Cancel the monitor. Once the method returns callback will not be invoked, + * but callback can be invoked reentrantly before this method returns. + */ + public synchronized void cancel() { + if (Objects.nonNull(scheduledFuture)) { + log.info("Cancelled migration ready monitor"); + scheduledFuture.cancel(true); + scheduledFuture = null; + } else { + log.info("{} is currently not active", this); + } + } + + @Override + public synchronized void run() { + try { + if (Thread.currentThread().isInterrupted()) { + log.info("{} cancelled, exiting...", this); + return; + } + if (!leaderDecider.isLeader(currentWorkerId)) { + log.debug("Not the leader, not performing migration ready check {}", this); + triggerStabilizer.reset(); + lastKnownUniqueLeaseOwners.clear(); + lastKnownWorkersWithActiveWorkerMetrics.clear(); + return; + } + + triggerStabilizer.call(isReadyForUpgradeTo3x()); + rateLimitedStatusLogger.log(() -> log.info("Monitor ran successfully {}", this)); + } catch (final Throwable t) { + log.warn("{} failed, will retry after {}", this, MONITOR_INTERVAL_MILLIS, t); + } + } + + @Override + public String toString() { + return new StringBuilder("UpgradeReadyMonitor[") + .append("G=") + .append(gsiStatusReady) + .append(",W=") + .append(workerMetricsReady) + .append("]") + .toString(); + } + + private boolean isReadyForUpgradeTo3x() throws DependencyException { + final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION); + try { + // If GSI is not ready, optimize to not check if worker metrics are being emitted + final boolean localGsiReadyStatus = leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive(); + if (localGsiReadyStatus != gsiStatusReady) { + gsiStatusReady = localGsiReadyStatus; + log.info("Gsi ready status changed to {}", gsiStatusReady); + } else { + log.debug("GsiReady status {}", gsiStatusReady); + } + return gsiStatusReady && areLeaseOwnersEmittingWorkerMetrics(); + } finally { + scope.addData("GsiReadyStatus", gsiStatusReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY); + scope.addData( + "WorkerMetricsReadyStatus", workerMetricsReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY); + MetricsUtil.endScope(scope); + } + } + + private boolean areLeaseOwnersEmittingWorkerMetrics() { + final CompletableFuture> leaseListFuture = loadLeaseListAsync(); + final CompletableFuture> workerMetricsFuture = loadWorkerMetricStats(); + + final List leaseList = leaseListFuture.join(); + final Set leaseOwners = getUniqueLeaseOwnersFromLeaseTable(leaseList); + final List workerMetricStatsList = workerMetricsFuture.join(); + final Set workersWithActiveWorkerMetrics = getWorkersWithActiveWorkerMetricStats(workerMetricStatsList); + + // Leases are not checked for expired condition because: + // If some worker has gone down and is not active, but has lease assigned to it, those leases + // maybe expired. Since the worker is down, it may not have worker-metrics, or worker-metrics may not be active, + // In that case, the migration condition is not considered to be met. + // However, those leases should be assigned to another worker and so the check in the next + // iteration could succeed. This is intentional to make sure all leases owners are accounted for + // and the old owner does not come back up without worker metrics and reacquires the lease. + final boolean localWorkerMetricsReady = leaseOwners.equals(workersWithActiveWorkerMetrics); + if (localWorkerMetricsReady != workerMetricsReady) { + workerMetricsReady = localWorkerMetricsReady; + log.info("WorkerMetricStats status changed to {}", workerMetricsReady); + log.info("Lease List {}", leaseList); + log.info("WorkerMetricStats {}", workerMetricStatsList); + } else { + log.debug("WorkerMetricStats ready status {}", workerMetricsReady); + } + + if (lastKnownUniqueLeaseOwners == null) { + log.info("Unique lease owners {}", leaseOwners); + } else if (!lastKnownUniqueLeaseOwners.equals(leaseOwners)) { + log.info("Unique lease owners changed to {}", leaseOwners); + } + lastKnownUniqueLeaseOwners = leaseOwners; + + if (lastKnownWorkersWithActiveWorkerMetrics == null) { + log.info("Workers with active worker metric stats {}", workersWithActiveWorkerMetrics); + } else if (!lastKnownWorkersWithActiveWorkerMetrics.equals(workersWithActiveWorkerMetrics)) { + log.info("Workers with active worker metric stats changed {}", workersWithActiveWorkerMetrics); + } + lastKnownWorkersWithActiveWorkerMetrics = workersWithActiveWorkerMetrics; + + return workerMetricsReady; + } + + private Set getUniqueLeaseOwnersFromLeaseTable(final List leaseList) { + return leaseList.stream().map(Lease::leaseOwner).collect(Collectors.toSet()); + } + + private Set getWorkersWithActiveWorkerMetricStats(final List workerMetricStats) { + final long nowInSeconds = Duration.ofMillis(now(timeProvider)).getSeconds(); + return workerMetricStats.stream() + .filter(metricStats -> isWorkerMetricStatsActive(metricStats, nowInSeconds)) + .map(WorkerMetricStats::getWorkerId) + .collect(Collectors.toSet()); + } + + private boolean isWorkerMetricStatsActive(final WorkerMetricStats metricStats, final long nowInSeconds) { + return (metricStats.getLastUpdateTime() + workerMetricStatsExpirySeconds) > nowInSeconds; + } + + private CompletableFuture> loadWorkerMetricStats() { + return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricStatsDAO::getAllWorkerMetricStats)); + } + + private CompletableFuture> loadLeaseListAsync() { + return CompletableFuture.supplyAsync(() -> loadWithRetry(leaseRefresher::listLeases)); + } + + private T loadWithRetry(final Callable loadFunction) { + int retryAttempt = 0; + while (true) { + try { + return loadFunction.call(); + } catch (final Exception e) { + if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) { + log.warn( + "Failed to load : {}, retrying", + loadFunction.getClass().getName(), + e); + retryAttempt++; + } else { + throw new CompletionException(e); + } + } + } + } + + private static long now(final Callable timeProvider) { + try { + return timeProvider.call(); + } catch (final Exception e) { + log.debug("Time provider threw exception, using System.currentTimeMillis", e); + return System.currentTimeMillis(); + } + } + + /** + * Stabilize the monitor trigger before invoking the callback + * to ensure we are consistently seeing the trigger for a configured + * stabilizationDurationInMillis + */ + private static class MonitorTriggerStabilizer { + private final Callable timeProvider; + private final long stabilizationDurationInSeconds; + private final Runnable callback; + private final String currentWorkerId; + private final LogRateLimiter rateLimitedTriggerStatusLogger; + + private long lastToggleTimeInMillis; + private boolean currentTriggerStatus; + + public MonitorTriggerStabilizer( + final Callable timeProvider, + final long stabilizationDurationInSeconds, + final Runnable callback, + final String currentWorkerId) { + this.timeProvider = timeProvider; + this.stabilizationDurationInSeconds = stabilizationDurationInSeconds; + this.callback = callback; + this.currentWorkerId = currentWorkerId; + this.rateLimitedTriggerStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS); + } + + public void call(final boolean isMonitorTriggered) { + final long now = now(timeProvider); + if (currentTriggerStatus != isMonitorTriggered) { + log.info("Trigger status has changed to {}", isMonitorTriggered); + currentTriggerStatus = isMonitorTriggered; + lastToggleTimeInMillis = now; + } + + if (currentTriggerStatus) { + final long deltaSeconds = + Duration.ofMillis(now - lastToggleTimeInMillis).getSeconds(); + if (deltaSeconds >= stabilizationDurationInSeconds) { + log.info("Trigger has been consistently true for {}s, invoking callback", deltaSeconds); + callback.run(); + } else { + rateLimitedTriggerStatusLogger.log(() -> log.info( + "Trigger has been true for {}s, waiting for stabilization time of {}s", + deltaSeconds, + stabilizationDurationInSeconds)); + } + } + } + + public void reset() { + if (currentTriggerStatus) { + log.info("This worker {} is no longer the leader, reset current status", currentWorkerId); + } + currentTriggerStatus = false; + } + } + + @RequiredArgsConstructor + private static class LogRateLimiter { + private final long logIntervalInNanos; + + private long nextLogTime = System.nanoTime(); + + public void log(final Runnable logger) { + final long now = System.nanoTime(); + if (now >= nextLogTime) { + logger.run(); + nextLogTime = now + logIntervalInNanos; + } + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java new file mode 100644 index 000000000..856af2e10 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationState.java @@ -0,0 +1,231 @@ +package software.amazon.kinesis.coordinator.migration; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.dynamodb.model.AttributeAction; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate; +import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue; +import software.amazon.kinesis.common.StackTraceUtils; +import software.amazon.kinesis.coordinator.CoordinatorState; + +/** + * Data model of the Migration state. This is used to track the state related to migration + * from KCLv2.x to KCLv3.x. + */ +@Getter +@ToString(callSuper = true) +@Slf4j +public class MigrationState extends CoordinatorState { + /** + * Key value for the item in the CoordinatorState table + */ + public static final String MIGRATION_HASH_KEY = "Migration3.0"; + /** + * Attribute name in migration state item, whose value is used during + * the KCL v3.x migration process to know whether the workers need to + * perform KCL v2.x compatible operations or can perform native KCL v3.x + * operations. + */ + public static final String CLIENT_VERSION_ATTRIBUTE_NAME = "cv"; + + public static final String MODIFIED_BY_ATTRIBUTE_NAME = "mb"; + public static final String MODIFIED_TIMESTAMP_ATTRIBUTE_NAME = "mts"; + public static final String HISTORY_ATTRIBUTE_NAME = "h"; + private static final int MAX_HISTORY_ENTRIES = 10; + + private ClientVersion clientVersion; + private String modifiedBy; + private long modifiedTimestamp; + private final List history; + + private MigrationState( + final String key, + final ClientVersion clientVersion, + final String modifiedBy, + final long modifiedTimestamp, + final List historyEntries, + final Map others) { + setKey(key); + setAttributes(others); + this.clientVersion = clientVersion; + this.modifiedBy = modifiedBy; + this.modifiedTimestamp = modifiedTimestamp; + this.history = historyEntries; + } + + public MigrationState(final String key, final String modifiedBy) { + this( + key, + ClientVersion.CLIENT_VERSION_INIT, + modifiedBy, + System.currentTimeMillis(), + new ArrayList<>(), + new HashMap<>()); + } + + public HashMap serialize() { + final HashMap result = new HashMap<>(); + result.put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(clientVersion.name())); + result.put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(modifiedBy)); + result.put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(modifiedTimestamp))); + + if (!history.isEmpty()) { + final List historyList = new ArrayList<>(); + for (final HistoryEntry entry : history) { + historyList.add(AttributeValue.builder().m(entry.serialize()).build()); + } + result.put( + HISTORY_ATTRIBUTE_NAME, + AttributeValue.builder().l(historyList).build()); + } + + return result; + } + + public static MigrationState deserialize(final String key, final HashMap attributes) { + if (!MIGRATION_HASH_KEY.equals(key)) { + return null; + } + + try { + final HashMap mutableAttributes = new HashMap<>(attributes); + final ClientVersion clientVersion = ClientVersion.valueOf( + mutableAttributes.remove(CLIENT_VERSION_ATTRIBUTE_NAME).s()); + final String modifiedBy = + mutableAttributes.remove(MODIFIED_BY_ATTRIBUTE_NAME).s(); + final long modifiedTimestamp = Long.parseLong( + mutableAttributes.remove(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n()); + + final List historyList = new ArrayList<>(); + if (attributes.containsKey(HISTORY_ATTRIBUTE_NAME)) { + mutableAttributes.remove(HISTORY_ATTRIBUTE_NAME).l().stream() + .map(historyEntry -> HistoryEntry.deserialize(historyEntry.m())) + .forEach(historyList::add); + } + final MigrationState migrationState = new MigrationState( + MIGRATION_HASH_KEY, clientVersion, modifiedBy, modifiedTimestamp, historyList, mutableAttributes); + + if (!mutableAttributes.isEmpty()) { + log.info("Unknown attributes {} for state {}", mutableAttributes, migrationState); + } + return migrationState; + + } catch (final Exception e) { + log.warn("Unable to deserialize state with key {} and attributes {}", key, attributes, e); + } + return null; + } + + public Map getDynamoClientVersionExpectation() { + return new HashMap() { + { + put( + CLIENT_VERSION_ATTRIBUTE_NAME, + ExpectedAttributeValue.builder() + .value(AttributeValue.fromS(clientVersion.name())) + .build()); + } + }; + } + + public MigrationState copy() { + return new MigrationState( + getKey(), + getClientVersion(), + getModifiedBy(), + getModifiedTimestamp(), + new ArrayList<>(getHistory()), + new HashMap<>(getAttributes())); + } + + public MigrationState update(final ClientVersion clientVersion, final String modifiedBy) { + log.info( + "Migration state is being updated to {} current state {} caller {}", + clientVersion, + this, + StackTraceUtils.getPrintableStackTrace(Thread.currentThread().getStackTrace())); + addHistoryEntry(this.clientVersion, this.modifiedBy, this.modifiedTimestamp); + this.clientVersion = clientVersion; + this.modifiedBy = modifiedBy; + this.modifiedTimestamp = System.currentTimeMillis(); + return this; + } + + public void addHistoryEntry( + final ClientVersion lastClientVersion, final String lastModifiedBy, final long lastModifiedTimestamp) { + history.add(0, new HistoryEntry(lastClientVersion, lastModifiedBy, lastModifiedTimestamp)); + if (history.size() > MAX_HISTORY_ENTRIES) { + log.info("Limit {} reached, dropping history {}", MAX_HISTORY_ENTRIES, history.remove(history.size() - 1)); + } + } + + public Map getDynamoUpdate() { + final HashMap updates = new HashMap<>(); + updates.put( + CLIENT_VERSION_ATTRIBUTE_NAME, + AttributeValueUpdate.builder() + .value(AttributeValue.fromS(clientVersion.name())) + .action(AttributeAction.PUT) + .build()); + updates.put( + MODIFIED_BY_ATTRIBUTE_NAME, + AttributeValueUpdate.builder() + .value(AttributeValue.fromS(modifiedBy)) + .action(AttributeAction.PUT) + .build()); + updates.put( + MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, + AttributeValueUpdate.builder() + .value(AttributeValue.fromN(String.valueOf(modifiedTimestamp))) + .action(AttributeAction.PUT) + .build()); + if (!history.isEmpty()) { + updates.put( + HISTORY_ATTRIBUTE_NAME, + AttributeValueUpdate.builder() + .value(AttributeValue.fromL( + history.stream().map(HistoryEntry::toAv).collect(Collectors.toList()))) + .action(AttributeAction.PUT) + .build()); + } + return updates; + } + + @RequiredArgsConstructor + @ToString + public static class HistoryEntry { + private final ClientVersion lastClientVersion; + private final String lastModifiedBy; + private final long lastModifiedTimestamp; + + public AttributeValue toAv() { + return AttributeValue.fromM(serialize()); + } + + public Map serialize() { + return new HashMap() { + { + put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(lastClientVersion.name())); + put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(lastModifiedBy)); + put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(lastModifiedTimestamp))); + } + }; + } + + public static HistoryEntry deserialize(final Map map) { + return new HistoryEntry( + ClientVersion.valueOf(map.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()), + map.get(MODIFIED_BY_ATTRIBUTE_NAME).s(), + Long.parseLong(map.get(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n())); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java new file mode 100644 index 000000000..4698feb08 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachine.java @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; + +/** + * State machine that provides: + * 1. Seamless upgrade from 2.x to 3.x - 3.x has introduced new algorithms that are not compatible with 2.x + * workers, so the state machine allows to seamlessly run the 2.x functionality to be compliant with any + * 2.x worker in the fleet, and also seamlessly switch to 3.x functionality when all KCL workers are + * 3.x complaint. + * 2. Instant rollbacks - Rollbacks are supported using the KCL Migration tool to revert back to 2.x functionality + * if customer finds regressions in 3.x functionality. + * 3. Instant roll-forwards - Once any issue has been mitigated, rollfowards are supported instantly + * with KCL Migration tool. + */ +public interface MigrationStateMachine { + /** + * Initialize the state machine by identifying the initial state when the KCL worker comes up for the first time. + * @throws DependencyException When unable to identify the initial state. + */ + void initialize() throws DependencyException; + + /** + * Shutdown state machine and perform necessary cleanup for the worker to gracefully shutdown + */ + void shutdown(); + + /** + * Terminate the state machine when it reaches a terminal state, which is a successful upgrade + * to v3.x. + */ + void terminate(); + + /** + * Peform transition from current state to the given new ClientVersion + * @param nextClientVersion clientVersion of the new state the state machine must transition to + * @param state the current MigrationState in dynamo + * @throws InvalidStateException when transition fails, this allows the state machine to stay + * in the current state until a valid transition is possible + * @throws DependencyException when transition fails due to dependency on DDB failing in + * unexpected ways. + */ + void transitionTo(final ClientVersion nextClientVersion, final MigrationState state) + throws InvalidStateException, DependencyException; + + /** + * Get the ClientVersion of current state machine state. + * @return ClientVersion of current state machine state + */ + ClientVersion getCurrentClientVersion(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java new file mode 100644 index 000000000..6bc081895 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineImpl.java @@ -0,0 +1,254 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.AbstractMap.SimpleEntry; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +/** + * Implementation of {@link MigrationStateMachine} + */ +@KinesisClientInternalApi +@Getter +@Slf4j +@ThreadSafe +public class MigrationStateMachineImpl implements MigrationStateMachine { + public static final String FAULT_METRIC = "Fault"; + public static final String METRICS_OPERATION = "Migration"; + + private static final long THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS = 5L; + + private final MetricsFactory metricsFactory; + private final Callable timeProvider; + private final CoordinatorStateDAO coordinatorStateDAO; + private final ScheduledExecutorService stateMachineThreadPool; + private DynamicMigrationComponentsInitializer initializer; + private final ClientVersionConfig clientVersionConfig; + private final Random random; + private final String workerId; + private final long flipTo3XStabilizerTimeInSeconds; + private MigrationState startingMigrationState; + + @Getter + private ClientVersion startingClientVersion; + + private MigrationClientVersionState currentMigrationClientVersionState = new MigrationClientVersionState() { + @Override + public ClientVersion clientVersion() { + return ClientVersion.CLIENT_VERSION_INIT; + } + + @Override + public void enter(final ClientVersion fromClientVersion) { + log.info("Entered {}...", clientVersion()); + } + + @Override + public void leave() { + log.info("Left {}...", clientVersion()); + } + }; + private boolean terminated = false; + + public MigrationStateMachineImpl( + final MetricsFactory metricsFactory, + final Callable timeProvider, + final CoordinatorStateDAO coordinatorStateDAO, + final ScheduledExecutorService stateMachineThreadPool, + final ClientVersionConfig clientVersionConfig, + final Random random, + final DynamicMigrationComponentsInitializer initializer, + final String workerId, + final long flipTo3XStabilizerTimeInSeconds) { + this.metricsFactory = metricsFactory; + this.timeProvider = timeProvider; + this.coordinatorStateDAO = coordinatorStateDAO; + this.stateMachineThreadPool = stateMachineThreadPool; + this.clientVersionConfig = clientVersionConfig; + this.random = random; + this.initializer = initializer; + this.workerId = workerId; + this.flipTo3XStabilizerTimeInSeconds = flipTo3XStabilizerTimeInSeconds; + } + + @Override + public void initialize() throws DependencyException { + if (startingClientVersion == null) { + log.info("Initializing MigrationStateMachine"); + coordinatorStateDAO.initialize(); + final MigrationClientVersionStateInitializer startingStateInitializer = + new MigrationClientVersionStateInitializer( + timeProvider, coordinatorStateDAO, clientVersionConfig, random, workerId); + final SimpleEntry dataForInitialization = + startingStateInitializer.getInitialState(); + initializer.initialize(dataForInitialization.getKey()); + transitionTo(dataForInitialization.getKey(), dataForInitialization.getValue()); + startingClientVersion = dataForInitialization.getKey(); + startingMigrationState = dataForInitialization.getValue(); + log.info("MigrationStateMachine initial clientVersion {}", startingClientVersion); + } else { + log.info("MigrationStateMachine already initialized with clientVersion {}", startingClientVersion); + } + } + + @Override + public void shutdown() { + terminate(); + if (!stateMachineThreadPool.isShutdown()) { + stateMachineThreadPool.shutdown(); + try { + if (stateMachineThreadPool.awaitTermination(THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + log.info( + "StateMachineThreadPool did not shutdown within {} seconds, forcefully shutting down", + THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS); + stateMachineThreadPool.shutdownNow(); + } + } catch (final InterruptedException e) { + log.info("Interrupted when shutting down StateMachineThreadPool, forcefully shutting down"); + stateMachineThreadPool.shutdownNow(); + } + } + log.info("Shutdown successfully"); + } + + @Override + public synchronized void terminate() { + if (!terminated && currentMigrationClientVersionState != null) { + log.info("State machine is about to terminate"); + currentMigrationClientVersionState.leave(); + currentMigrationClientVersionState = null; + log.info("State machine reached a terminal state."); + terminated = true; + } + } + + @Override + public synchronized void transitionTo(final ClientVersion nextClientVersion, final MigrationState migrationState) + throws DependencyException { + if (terminated) { + throw new IllegalStateException(String.format( + "Cannot transition to %s after state machine is terminated, %s", + nextClientVersion.name(), migrationState)); + } + + final MigrationClientVersionState nextMigrationClientVersionState = + createMigrationClientVersionState(nextClientVersion, migrationState); + log.info( + "Attempting to transition from {} to {}", + currentMigrationClientVersionState.clientVersion(), + nextClientVersion); + currentMigrationClientVersionState.leave(); + + enter(nextMigrationClientVersionState); + } + + /** + * Enter with retry. When entering the state machine for the first time, the caller has retry so exceptions + * will be re-thrown. Once the state machine has initialized all transitions will be an indefinite retry. + * It is possible the DDB state has changed by the time enter succeeds but that will occur as a new + * state transition after entering the state. Usually the failures are due to unexpected issues with + * DDB which will be transitional and will recover on a retry. + * @param nextMigrationClientVersionState the state to transition to + * @throws DependencyException If entering fails during state machine initialization. + */ + private void enter(final MigrationClientVersionState nextMigrationClientVersionState) throws DependencyException { + boolean success = false; + while (!success) { + try { + // Enter should never fail unless it is the starting state and fails to create the GSI, + // in which case it is an unrecoverable error that is bubbled up and KCL start up will fail. + nextMigrationClientVersionState.enter(currentMigrationClientVersionState.clientVersion()); + + currentMigrationClientVersionState = nextMigrationClientVersionState; + log.info("Successfully transitioned to {}", nextMigrationClientVersionState.clientVersion()); + if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_3X) { + terminate(); + } + success = true; + } catch (final DependencyException e) { + if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_INIT) { + throw e; + } + log.info( + "Transitioning from {} to {} failed, retrying after a minute", + currentMigrationClientVersionState.clientVersion(), + nextMigrationClientVersionState.clientVersion(), + e); + + final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION); + scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + MetricsUtil.endScope(scope); + + try { + Thread.sleep(1000); + } catch (final InterruptedException ie) { + log.info("Interrupted while sleeping before retrying state machine transition", ie); + } + } + } + } + + private MigrationClientVersionState createMigrationClientVersionState( + final ClientVersion clientVersion, final MigrationState migrationState) { + switch (clientVersion) { + case CLIENT_VERSION_2X: + return new MigrationClientVersion2xState( + this, coordinatorStateDAO, stateMachineThreadPool, initializer, random); + case CLIENT_VERSION_UPGRADE_FROM_2X: + return new MigrationClientVersionUpgradeFrom2xState( + this, + timeProvider, + coordinatorStateDAO, + stateMachineThreadPool, + initializer, + random, + migrationState, + flipTo3XStabilizerTimeInSeconds); + case CLIENT_VERSION_3X_WITH_ROLLBACK: + return new MigrationClientVersion3xWithRollbackState( + this, coordinatorStateDAO, stateMachineThreadPool, initializer, random); + case CLIENT_VERSION_3X: + return new MigrationClientVersion3xState(this, initializer); + } + throw new IllegalStateException(String.format("Unknown client version %s", clientVersion)); + } + + public ClientVersion getCurrentClientVersion() { + if (currentMigrationClientVersionState != null) { + return currentMigrationClientVersionState.clientVersion(); + } else if (terminated) { + return ClientVersion.CLIENT_VERSION_3X; + } + throw new UnsupportedOperationException( + "No current state when state machine is either not initialized" + " or already terminated"); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java new file mode 100644 index 000000000..8e9376afc --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDecider.java @@ -0,0 +1,270 @@ +package software.amazon.kinesis.leader; + +import java.time.Duration; +import java.time.Instant; +import java.util.AbstractMap; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.amazonaws.services.dynamodbv2.AcquireLockOptions; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient; +import com.amazonaws.services.dynamodbv2.GetLockOptions; +import com.amazonaws.services.dynamodbv2.LockItem; +import com.amazonaws.services.dynamodbv2.model.LockCurrentlyUnavailableException; +import com.google.common.annotations.VisibleForTesting; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static java.util.Objects.isNull; +import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY; + +/** + * Implementation for LeaderDecider to elect leader using lock on dynamo db table. This class uses + * AmazonDynamoDBLockClient library to perform the leader election. + */ +@RequiredArgsConstructor +@Slf4j +public class DynamoDBLockBasedLeaderDecider implements LeaderDecider { + private static final Long DEFAULT_LEASE_DURATION_MILLIS = + Duration.ofMinutes(2).toMillis(); + // Heartbeat frequency should be at-least 3 times smaller the lease duration according to LockClient documentation + private static final Long DEFAULT_HEARTBEAT_PERIOD_MILLIS = + Duration.ofSeconds(30).toMillis(); + + private final CoordinatorStateDAO coordinatorStateDao; + private final AmazonDynamoDBLockClient dynamoDBLockClient; + private final Long heartbeatPeriodMillis; + private final String workerId; + private final MetricsFactory metricsFactory; + + private long lastCheckTimeInMillis = 0L; + private boolean lastIsLeaderResult = false; + private final AtomicBoolean isShutdown = new AtomicBoolean(false); + + private long lastIsAnyLeaderElectedDDBReadTimeMillis = 0L; + private boolean lastIsAnyLeaderElectedResult = false; + /** + * Key value pair of LockItem to the time when it was first discovered. + * If a new LockItem fetched from ddb has different recordVersionNumber than the one in-memory, + * its considered as new LockItem, and the time when it was fetched is stored in memory to identify lockItem + * expiry. This is used only in the context of isAnyLeaderElected method. + */ + private AbstractMap.SimpleEntry lastIsAnyLeaderCheckLockItemToFirstEncounterTime = null; + + @VisibleForTesting + static DynamoDBLockBasedLeaderDecider create( + final CoordinatorStateDAO coordinatorStateDao, + final String workerId, + final Long leaseDuration, + final Long heartbeatPeriod, + final MetricsFactory metricsFactory) { + final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(coordinatorStateDao + .getDDBLockClientOptionsBuilder() + .withTimeUnit(TimeUnit.MILLISECONDS) + .withLeaseDuration(leaseDuration) + .withHeartbeatPeriod(heartbeatPeriod) + .withCreateHeartbeatBackgroundThread(true) + .withOwnerName(workerId) + .build()); + + return new DynamoDBLockBasedLeaderDecider( + coordinatorStateDao, dynamoDBLockClient, heartbeatPeriod, workerId, metricsFactory); + } + + public static DynamoDBLockBasedLeaderDecider create( + final CoordinatorStateDAO coordinatorStateDao, final String workerId, final MetricsFactory metricsFactory) { + return create( + coordinatorStateDao, + workerId, + DEFAULT_LEASE_DURATION_MILLIS, + DEFAULT_HEARTBEAT_PERIOD_MILLIS, + metricsFactory); + } + + @Override + public void initialize() { + log.info("Initializing DDB Lock based leader decider"); + } + + /** + * Check the lockItem in storage and if the current worker is not leader worker, then tries to acquire lock and + * returns true if it was able to acquire lock else false. + * @param workerId ID of the worker + * @return true if current worker is leader else false. + */ + @Override + public synchronized Boolean isLeader(final String workerId) { + // if the decider has shutdown, then return false and don't try acquireLock anymore. + if (isShutdown.get()) { + publishIsLeaderMetrics(false); + return false; + } + // If the last time we tried to take lock and didnt get lock, don't try to take again for heartbeatPeriodMillis + // this is to avoid unnecessary calls to dynamoDB. + // Different modules in KCL can request for isLeader check within heartbeatPeriodMillis, and this optimization + // will help in those cases. + // In case the last call returned true, we want to check the source always to ensure the correctness of leader. + if (!lastIsLeaderResult && lastCheckTimeInMillis + heartbeatPeriodMillis > System.currentTimeMillis()) { + publishIsLeaderMetrics(lastIsLeaderResult); + return lastIsLeaderResult; + } + boolean response; + // Get the lockItem from storage (if present + final Optional lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty()); + lockItem.ifPresent(item -> log.info("Worker : {} is the current leader.", item.getOwnerName())); + + // If the lockItem is present and is expired, that means either current worker is not leader. + if (!lockItem.isPresent() || lockItem.get().isExpired()) { + try { + // Current worker does not hold the lock, try to acquireOne. + final Optional leaderLockItem = + dynamoDBLockClient.tryAcquireLock(AcquireLockOptions.builder(LEADER_HASH_KEY) + .withRefreshPeriod(heartbeatPeriodMillis) + .withTimeUnit(TimeUnit.MILLISECONDS) + .withShouldSkipBlockingWait(true) + .build()); + leaderLockItem.ifPresent(item -> log.info("Worker : {} is new leader", item.getOwnerName())); + // if leaderLockItem optional is empty, that means the lock is not acquired by this worker. + response = leaderLockItem.isPresent(); + } catch (final InterruptedException e) { + // Something bad happened, don't assume leadership and also release lock just in case the + // lock was granted and still interrupt happened. + releaseLeadershipIfHeld(); + log.error("Acquiring lock was interrupted in between", e); + response = false; + + } catch (final LockCurrentlyUnavailableException e) { + response = false; + } + + } else { + response = lockItem.get().getOwnerName().equals(workerId); + } + + lastCheckTimeInMillis = System.currentTimeMillis(); + lastIsLeaderResult = response; + publishIsLeaderMetrics(response); + return response; + } + + private void publishIsLeaderMetrics(final boolean response) { + final MetricsScope metricsScope = + MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER); + metricsScope.addData( + METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED); + MetricsUtil.endScope(metricsScope); + } + + /** + * Releases the lock if held by current worker when this method is invoked. + */ + @Override + public void shutdown() { + if (!isShutdown.getAndSet(true)) { + releaseLeadershipIfHeld(); + } + } + + @Override + public void releaseLeadershipIfHeld() { + try { + final Optional lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty()); + if (lockItem.isPresent() + && !lockItem.get().isExpired() + && lockItem.get().getOwnerName().equals(workerId)) { + + log.info( + "Current worker : {} holds the lock, releasing it.", + lockItem.get().getOwnerName()); + // LockItem.close() will release the lock if current worker owns it else this call is no op. + lockItem.get().close(); + } + } catch (final Exception e) { + log.error("Failed to complete releaseLeadershipIfHeld call.", e); + } + } + + /** + * Returns if any ACTIVE leader exists that is elected by the current implementation which can be outside the + * scope of this worker. That is leader elected by this implementation in any worker in fleet. + * DynamoDBLockClient does not provide an interface which can tell if an active lock exists or not, thus + * we need to put custom implementation. + * The implementation performs DDB get every heartbeatPeriodMillis to have low RCU consumption, which means that + * the leader could have been elected from the last time the check happened and before check happens again. + * The information returned from this method has eventual consistency (up to heartbeatPeriodMillis interval). + * + * @return true, if any leader is elected else false. + */ + @Override + public synchronized boolean isAnyLeaderElected() { + // Avoid going to ddb for every call and do it once every heartbeatPeriod to have low RCU usage. + if (Duration.between( + Instant.ofEpochMilli(lastIsAnyLeaderElectedDDBReadTimeMillis), + Instant.ofEpochMilli(System.currentTimeMillis())) + .toMillis() + > heartbeatPeriodMillis) { + final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation( + metricsFactory, this.getClass().getSimpleName() + ":isAnyLeaderElected"); + final long startTime = System.currentTimeMillis(); + try { + lastIsAnyLeaderElectedDDBReadTimeMillis = System.currentTimeMillis(); + final Optional lockItem = dynamoDBLockClient.getLockFromDynamoDB( + GetLockOptions.builder(LEADER_HASH_KEY).build()); + + if (!lockItem.isPresent()) { + // There is no LockItem in the ddb table, that means no one is holding lock. + lastIsAnyLeaderElectedResult = false; + log.info("LockItem present : {}", false); + } else { + final LockItem ddbLockItem = lockItem.get(); + if (isNull(lastIsAnyLeaderCheckLockItemToFirstEncounterTime) + || !ddbLockItem + .getRecordVersionNumber() + .equals(lastIsAnyLeaderCheckLockItemToFirstEncounterTime + .getKey() + .getRecordVersionNumber())) { + // This is the first isAnyLeaderElected call, so we can't evaluate if the LockItem has expired + // or not yet so consider LOCK as ACTIVE. + // OR LockItem in ddb and in-memory LockItem have different RecordVersionNumber + // and thus the LOCK is still ACTIVE + lastIsAnyLeaderElectedResult = true; + lastIsAnyLeaderCheckLockItemToFirstEncounterTime = + new AbstractMap.SimpleEntry<>(ddbLockItem, lastIsAnyLeaderElectedDDBReadTimeMillis); + log.info( + "LockItem present : {}, and this is either first call OR lockItem has had " + + "a heartbeat", + true); + } else { + // There is no change in the ddb lock item, so if the last update time is more than + // lease duration, the lock is expired else it is still ACTIVE, + lastIsAnyLeaderElectedResult = lastIsAnyLeaderCheckLockItemToFirstEncounterTime.getValue() + + ddbLockItem.getLeaseDuration() + > lastIsAnyLeaderElectedDDBReadTimeMillis; + log.info("LockItem present : {}, and lease expiry: {}", true, lastIsAnyLeaderElectedResult); + } + } + } catch (final ResourceNotFoundException exception) { + log.info("Lock table does not exists..."); + // If the table itself doesn't exist, there is no elected leader. + lastIsAnyLeaderElectedResult = false; + } finally { + metricsScope.addData( + "Latency", + System.currentTimeMillis() - startTime, + StandardUnit.MILLISECONDS, + MetricsLevel.DETAILED); + MetricsUtil.endScope(metricsScope); + } + } + return lastIsAnyLeaderElectedResult; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java new file mode 100644 index 000000000..8d9d27177 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDecider.java @@ -0,0 +1,79 @@ +package software.amazon.kinesis.leader; + +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static java.util.Objects.nonNull; + +/** + * MigrationAdaptiveLeaderDecider that wraps around the actual LeaderDecider which can dynamically + * change based on the MigrationStateMachine. + */ +@Slf4j +@KinesisClientInternalApi +@ThreadSafe +public class MigrationAdaptiveLeaderDecider implements LeaderDecider { + + private final MetricsFactory metricsFactory; + private LeaderDecider currentLeaderDecider; + + public MigrationAdaptiveLeaderDecider(final MetricsFactory metricsFactory) { + this.metricsFactory = metricsFactory; + } + + @Override + public synchronized Boolean isLeader(final String workerId) { + if (currentLeaderDecider == null) { + throw new IllegalStateException("LeaderDecider uninitialized"); + } + + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER); + try { + publishSelectedLeaderDeciderMetrics(scope, currentLeaderDecider); + return currentLeaderDecider.isLeader(workerId); + } finally { + MetricsUtil.endScope(scope); + } + } + + private static void publishSelectedLeaderDeciderMetrics( + final MetricsScope scope, final LeaderDecider leaderDecider) { + scope.addData( + String.format(leaderDecider.getClass().getSimpleName()), 1D, StandardUnit.COUNT, MetricsLevel.DETAILED); + } + + public synchronized void updateLeaderDecider(final LeaderDecider leaderDecider) { + if (currentLeaderDecider != null) { + currentLeaderDecider.shutdown(); + log.info( + "Updating leader decider dynamically from {} to {}", + this.currentLeaderDecider.getClass().getSimpleName(), + leaderDecider.getClass().getSimpleName()); + } else { + log.info( + "Initializing dynamic leader decider with {}", + leaderDecider.getClass().getSimpleName()); + } + currentLeaderDecider = leaderDecider; + currentLeaderDecider.initialize(); + } + + @Override + public void shutdown() { + if (nonNull(currentLeaderDecider)) { + log.info("Shutting down current {}", currentLeaderDecider.getClass().getSimpleName()); + currentLeaderDecider.shutdown(); + currentLeaderDecider = null; + } else { + log.info("LeaderDecider has already been shutdown"); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java index 34b13f646..6c27e1441 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/DynamoUtils.java @@ -81,8 +81,20 @@ public static Long safeGetLong(Map dynamoRecord, String } } + public static AttributeValue createAttributeValue(Double doubleValue) { + if (doubleValue == null) { + throw new IllegalArgumentException("Double attributeValues cannot be null."); + } + + return AttributeValue.builder().n(doubleValue.toString()).build(); + } + public static String safeGetString(Map dynamoRecord, String key) { AttributeValue av = dynamoRecord.get(key); + return safeGetString(av); + } + + public static String safeGetString(AttributeValue av) { if (av == null) { return null; } else { @@ -99,4 +111,13 @@ public static List safeGetSS(Map dynamoRecord, S return av.ss(); } } + + public static Double safeGetDouble(Map dynamoRecord, String key) { + AttributeValue av = dynamoRecord.get(key); + if (av == null) { + return null; + } else { + return new Double(av.n()); + } + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java index d128fc950..97ed48d47 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/KinesisShardDetector.java @@ -103,26 +103,6 @@ public class KinesisShardDetector implements ShardDetector { private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true; - @Deprecated - public KinesisShardDetector( - KinesisAsyncClient kinesisClient, - String streamName, - long listShardsBackoffTimeInMillis, - int maxListShardsRetryAttempts, - long listShardsCacheAllowedAgeInSeconds, - int maxCacheMissesBeforeReload, - int cacheMissWarningModulus) { - this( - kinesisClient, - StreamIdentifier.singleStreamInstance(streamName), - listShardsBackoffTimeInMillis, - maxListShardsRetryAttempts, - listShardsCacheAllowedAgeInSeconds, - maxCacheMissesBeforeReload, - cacheMissWarningModulus, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT); - } - public KinesisShardDetector( KinesisAsyncClient kinesisClient, StreamIdentifier streamIdentifier, diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java index 01735f9c8..9d44a7554 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/Lease.java @@ -46,7 +46,11 @@ "lastCounterIncrementNanos", "childShardIds", "pendingCheckpointState", - "isMarkedForLeaseSteal" + "isMarkedForLeaseSteal", + "throughputKBps", + "checkpointOwner", + "checkpointOwnerTimeoutTimestampMillis", + "isExpiredOrUnassigned" }) @ToString public class Lease { @@ -104,6 +108,33 @@ public class Lease { @Setter private boolean isMarkedForLeaseSteal; + /** + * If true, this indicates that lease is ready to be immediately reassigned. + */ + @Setter + private boolean isExpiredOrUnassigned; + + /** + * Throughput in Kbps for the lease. + */ + private Double throughputKBps; + + /** + * Owner of the checkpoint. The attribute is used for graceful shutdowns to indicate the owner that + * is allowed to write the checkpoint. + */ + @Setter + private String checkpointOwner; + + /** + * This field is used for tracking when the shutdown was requested on the lease so we can expire it. This is + * deliberately not persisted in DynamoDB because leaseOwner are expected to transfer lease from itself to the + * next owner during shutdown. If the worker dies before shutdown the lease will just become expired then we can + * pick it up. If for some reason worker is not able to shut down and continues holding onto the lease + * this timeout will kick in and force a lease transfer. + */ + @Setter + private Long checkpointOwnerTimeoutTimestampMillis; /** * Count of distinct lease holders between checkpoints. */ @@ -242,6 +273,54 @@ public boolean isExpired(long leaseDurationNanos, long asOfNanos) { } } + /** + * @return true if checkpoint owner is set. Indicating a requested shutdown. + */ + public boolean shutdownRequested() { + return checkpointOwner != null; + } + + /** + * Check whether lease should be blocked on pending checkpoint. We DON'T block if + * - lease is expired (Expired lease should be assigned right away) OR + * ----- at this point we know lease is assigned ----- + * - lease is shardEnd (No more processing possible) OR + * - lease is NOT requested for shutdown OR + * - lease shutdown expired + * + * @param currentTimeMillis current time in milliseconds + * @return true if lease is blocked on pending checkpoint + */ + public boolean blockedOnPendingCheckpoint(long currentTimeMillis) { + // using ORs and negate + return !(isExpiredOrUnassigned + || ExtendedSequenceNumber.SHARD_END.equals(checkpoint) + || !shutdownRequested() + // if shutdown requested then checkpointOwnerTimeoutTimestampMillis should present + || currentTimeMillis - checkpointOwnerTimeoutTimestampMillis >= 0); + } + + /** + * Check whether lease is eligible for graceful shutdown. It's eligible if + * - lease is still assigned (not expired) AND + * - lease is NOT shardEnd (No more processing possible AND + * - lease is NOT requested for shutdown + * + * @return true if lease is eligible for graceful shutdown + */ + public boolean isEligibleForGracefulShutdown() { + return !isExpiredOrUnassigned && !ExtendedSequenceNumber.SHARD_END.equals(checkpoint) && !shutdownRequested(); + } + + /** + * Need to handle the case during graceful shutdown where leaseOwner isn't the current owner + * + * @return the actual owner + */ + public String actualOwner() { + return checkpointOwner == null ? leaseOwner : checkpointOwner; + } + /** * @return true if lease is not currently owned */ @@ -343,6 +422,15 @@ public void childShardIds(@NonNull final Collection childShardIds) { this.childShardIds.addAll(childShardIds); } + /** + * Sets throughputKbps. + * + * @param throughputKBps may not be null + */ + public void throughputKBps(double throughputKBps) { + this.throughputKBps = throughputKBps; + } + /** * Set the hash range key for this shard. * @param hashKeyRangeForLease @@ -370,6 +458,8 @@ public void leaseOwner(String leaseOwner) { * @return A deep copy of this object. */ public Lease copy() { - return new Lease(this); + final Lease lease = new Lease(this); + lease.checkpointOwner(this.checkpointOwner); + return lease; } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java index acc08dabc..4a42b2c4a 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseCoordinator.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.UUID; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator; import software.amazon.kinesis.leases.exceptions.DependencyException; import software.amazon.kinesis.leases.exceptions.InvalidStateException; @@ -38,11 +39,14 @@ public interface LeaseCoordinator { /** * Start background LeaseHolder and LeaseTaker threads. + * @param leaseAssignmentModeProvider provider of Lease Assignment mode to determine whether to start components + * for both V2 and V3 functionality or only V3 functionality * @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity. * @throws InvalidStateException If the lease table doesn't exist * @throws DependencyException If we encountered exception taking to DynamoDB */ - void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException; + void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) + throws DependencyException, InvalidStateException, ProvisionedThroughputException; /** * Runs a single iteration of the lease taker - used by integration tests. @@ -152,4 +156,9 @@ default List allLeases() { * @return LeaseCoordinator */ DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity); + + /** + * @return instance of {@link LeaseStatsRecorder} + */ + LeaseStatsRecorder leaseStatsRecorder(); } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java new file mode 100644 index 000000000..7e016f415 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseDiscoverer.java @@ -0,0 +1,20 @@ +package software.amazon.kinesis.leases; + +import java.util.List; + +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; + +public interface LeaseDiscoverer { + /** + * Identifies the leases that are assigned to the current worker but are not being tracked and processed by the + * current worker. + * + * @return list of leases assigned to worker which doesn't exist in {@param currentHeldLeaseKeys} + * @throws DependencyException if DynamoDB scan fails in an unexpected way + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity + */ + List discoverNewLeases() throws ProvisionedThroughputException, InvalidStateException, DependencyException; +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java index 2d4e041c0..ef750f461 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementConfig.java @@ -16,7 +16,9 @@ package software.amazon.kinesis.leases; import java.time.Duration; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadFactory; @@ -25,6 +27,7 @@ import java.util.function.Function; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import lombok.Builder; import lombok.Data; import lombok.NonNull; import lombok.experimental.Accessors; @@ -34,14 +37,17 @@ import software.amazon.awssdk.services.dynamodb.model.BillingMode; import software.amazon.awssdk.services.dynamodb.model.Tag; import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.common.InitialPositionInStream; import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.LeaseCleanupConfig; import software.amazon.kinesis.common.StreamConfig; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer; import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback; import software.amazon.kinesis.metrics.MetricsFactory; import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.worker.metric.WorkerMetric; /** * Used by the KCL to configure lease management. @@ -209,6 +215,9 @@ public class LeaseManagementConfig { private BillingMode billingMode = BillingMode.PAY_PER_REQUEST; + private WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig = + new WorkerUtilizationAwareAssignmentConfig(); + /** * Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update * already existing tables. @@ -276,14 +285,17 @@ public LeaseManagementConfig( } public LeaseManagementConfig( - String tableName, - DynamoDbAsyncClient dynamoDBClient, - KinesisAsyncClient kinesisClient, - String workerIdentifier) { + final String tableName, + final String applicationName, + final DynamoDbAsyncClient dynamoDBClient, + final KinesisAsyncClient kinesisClient, + final String workerIdentifier) { this.tableName = tableName; this.dynamoDBClient = dynamoDBClient; this.kinesisClient = kinesisClient; this.workerIdentifier = workerIdentifier; + this.workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig = + new WorkerMetricsTableConfig(applicationName); } /** @@ -350,10 +362,18 @@ static class LeaseManagementThreadPool extends ThreadPoolExecutor { */ private TableCreatorCallback tableCreatorCallback = TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK; + /** + * @deprecated never used and will be removed in future releases + */ + @Deprecated private HierarchicalShardSyncer hierarchicalShardSyncer; private LeaseManagementFactory leaseManagementFactory; + /** + * @deprecated never used and will be removed in future releases + */ + @Deprecated public HierarchicalShardSyncer hierarchicalShardSyncer() { if (hierarchicalShardSyncer == null) { hierarchicalShardSyncer = new HierarchicalShardSyncer(); @@ -361,39 +381,63 @@ public HierarchicalShardSyncer hierarchicalShardSyncer() { return hierarchicalShardSyncer; } + /** + * Configuration class for controlling the graceful handoff of leases. + * This configuration allows tuning of the shutdown behavior during lease transfers. + *

+ * It provides settings to control the timeout period for waiting on the record processor + * to shut down and an option to enable or disable graceful lease handoff. + *

+ */ + @Data + @Builder + @Accessors(fluent = true) + public static class GracefulLeaseHandoffConfig { + /** + * The minimum amount of time (in milliseconds) to wait for the current shard's RecordProcessor + * to gracefully shut down before forcefully transferring the lease to the next owner. + *

+ * If each call to {@code processRecords} is expected to run longer than the default value, + * it makes sense to set this to a higher value to ensure the RecordProcessor has enough + * time to complete its processing. + *

+ *

+ * Default value is 30,000 milliseconds (30 seconds). + *

+ */ + @Builder.Default + private long gracefulLeaseHandoffTimeoutMillis = 30_000L; + /** + * Flag to enable or disable the graceful lease handoff mechanism. + *

+ * When set to {@code true}, the KCL will attempt to gracefully transfer leases by + * allowing the shard's RecordProcessor sufficient time to complete processing before + * handing off the lease to another worker. When {@code false}, the lease will be + * handed off without waiting for the RecordProcessor to shut down gracefully. Note + * that checkpointing is expected to be implemented inside {@code shutdownRequested} + * for this feature to work end to end. + *

+ *

+ * Default value is {@code true}. + *

+ */ + @Builder.Default + private boolean isGracefulLeaseHandoffEnabled = true; + } + + private GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig = + GracefulLeaseHandoffConfig.builder().build(); + + /** + * @deprecated This is no longer invoked, but {@code leaseManagementFactory(LeaseSerializer, boolean)} + * is invoked instead. Please remove implementation for this method as future + * releases will remove this API. + */ @Deprecated public LeaseManagementFactory leaseManagementFactory() { if (leaseManagementFactory == null) { Validate.notEmpty(streamName(), "Stream name is empty"); - leaseManagementFactory = new DynamoDBLeaseManagementFactory( - kinesisClient(), - streamName(), - dynamoDBClient(), - tableName(), - workerIdentifier(), - executorService(), - initialPositionInStream(), - failoverTimeMillis(), - epsilonMillis(), - maxLeasesForWorker(), - maxLeasesToStealAtOneTime(), - maxLeaseRenewalThreads(), - cleanupLeasesUponShardCompletion(), - ignoreUnexpectedChildShards(), - shardSyncIntervalMillis(), - consistentReads(), - listShardsBackoffTimeInMillis(), - maxListShardsRetryAttempts(), - maxCacheMissesBeforeReload(), - listShardsCacheAllowedAgeInSeconds(), - cacheMissWarningModulus(), - initialLeaseTableReadCapacity(), - initialLeaseTableWriteCapacity(), - hierarchicalShardSyncer(), - tableCreatorCallback(), - dynamoDbRequestTimeout(), - billingMode(), - tags()); + leaseManagementFactory(new DynamoDBLeaseSerializer(), false); } return leaseManagementFactory; } @@ -430,7 +474,6 @@ public LeaseManagementFactory leaseManagementFactory( cacheMissWarningModulus(), initialLeaseTableReadCapacity(), initialLeaseTableWriteCapacity(), - hierarchicalShardSyncer(), tableCreatorCallback(), dynamoDbRequestTimeout(), billingMode(), @@ -440,7 +483,9 @@ public LeaseManagementFactory leaseManagementFactory( leaseSerializer, customShardDetectorProvider(), isMultiStreamingMode, - leaseCleanupConfig()); + leaseCleanupConfig(), + workerUtilizationAwareAssignmentConfig(), + gracefulLeaseHandoffConfig); } return leaseManagementFactory; } @@ -454,4 +499,90 @@ public LeaseManagementConfig leaseManagementFactory(final LeaseManagementFactory this.leaseManagementFactory = leaseManagementFactory; return this; } + + @Data + @Accessors(fluent = true) + public static class WorkerUtilizationAwareAssignmentConfig { + /** + * This defines the frequency of capturing worker metric stats in memory. Default is 1s + */ + private long inMemoryWorkerMetricsCaptureFrequencyMillis = + Duration.ofSeconds(1L).toMillis(); + /** + * This defines the frequency of reporting worker metric stats to storage. Default is 30s + */ + private long workerMetricsReporterFreqInMillis = Duration.ofSeconds(30).toMillis(); + /** + * These are the no. of metrics that are persisted in storage in WorkerMetricStats ddb table. + */ + private int noOfPersistedMetricsPerWorkerMetrics = 10; + /** + * Option to disable workerMetrics to use in lease balancing. + */ + private boolean disableWorkerMetrics = false; + /** + * List of workerMetrics for the application. + */ + private List workerMetricList = new ArrayList<>(); + /** + * Max throughput per host KBps, default is unlimited. + */ + private double maxThroughputPerHostKBps = Double.MAX_VALUE; + /** + * Percentage of value to achieve critical dampening during this case + */ + private int dampeningPercentage = 60; + /** + * Percentage value used to trigger reBalance. If fleet has workers which are have metrics value more or less + * than 10% of fleet level average then reBalance is triggered. + * Leases are taken from workers with metrics value more than fleet level average. The load to take from these + * workers is determined by evaluating how far they are with respect to fleet level average. + */ + private int reBalanceThresholdPercentage = 10; + + /** + * The allowThroughputOvershoot flag determines whether leases should still be taken even if + * it causes the total assigned throughput to exceed the desired throughput to take for re-balance. + * Enabling this flag provides more flexibility for the LeaseAssignmentManager to explore additional + * assignment possibilities, which can lead to faster throughput convergence. + */ + private boolean allowThroughputOvershoot = true; + + /** + * Duration after which workerMetricStats entry from WorkerMetricStats table will be cleaned up. When an entry's + * lastUpdateTime is older than staleWorkerMetricsEntryCleanupDuration from current time, entry will be removed + * from the table. + */ + private Duration staleWorkerMetricsEntryCleanupDuration = Duration.ofDays(1); + + /** + * configuration to configure how to create the WorkerMetricStats table, such as table name, + * billing mode, provisioned capacity. If no table name is specified, the table name will + * default to applicationName-WorkerMetricStats. If no billing more is chosen, default is + * On-Demand. + */ + private WorkerMetricsTableConfig workerMetricsTableConfig; + + /** + * Frequency to perform worker variance balancing. This value is used with respect to the LAM frequency, + * that is every third (as default) iteration of LAM the worker variance balancing will be performed. + * Setting it to 1 will make varianceBalancing run on every iteration of LAM and 2 on every 2nd iteration + * and so on. + * NOTE: LAM frequency = failoverTimeMillis + */ + private int varianceBalancingFrequency = 3; + + /** + * Alpha value used for calculating exponential moving average of worker's metricStats. Selecting + * higher alpha value gives more weightage to recent value and thus low smoothing effect on computed average + * and selecting smaller alpha values gives more weightage to past value and high smoothing effect. + */ + private double workerMetricsEMAAlpha = 0.5; + } + + public static class WorkerMetricsTableConfig extends DdbTableConfig { + public WorkerMetricsTableConfig(final String applicationName) { + super(applicationName, "WorkerMetricStats"); + } + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java index 9ed77a537..788034d1e 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseManagementFactory.java @@ -15,9 +15,12 @@ package software.amazon.kinesis.leases; +import java.util.concurrent.ConcurrentMap; + import software.amazon.kinesis.common.StreamConfig; import software.amazon.kinesis.coordinator.DeletedStreamListProvider; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; +import software.amazon.kinesis.lifecycle.ShardConsumer; import software.amazon.kinesis.metrics.MetricsFactory; /** @@ -26,10 +29,27 @@ public interface LeaseManagementFactory { LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory); - ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory); + default LeaseCoordinator createLeaseCoordinator( + MetricsFactory metricsFactory, ConcurrentMap shardInfoShardConsumerMap) { + throw new UnsupportedOperationException("Not implemented"); + } + /** + * @deprecated This method is never invoked, please remove implementation of this method + * as it will be removed in future releases. + */ + @Deprecated + default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory) { + throw new UnsupportedOperationException("Deprecated"); + } + + /** + * @deprecated This method is never invoked, please remove implementation of this method + * as it will be removed in future releases. + */ + @Deprecated default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("Deprecated"); } default ShardSyncTaskManager createShardSyncTaskManager( @@ -41,10 +61,17 @@ default ShardSyncTaskManager createShardSyncTaskManager( DynamoDBLeaseRefresher createLeaseRefresher(); - ShardDetector createShardDetector(); + /** + * @deprecated This method is never invoked, please remove implementation of this method + * as it will be removed in future releases. + */ + @Deprecated + default ShardDetector createShardDetector() { + throw new UnsupportedOperationException("Deprecated"); + } default ShardDetector createShardDetector(StreamConfig streamConfig) { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("Not implemented"); } LeaseCleanupManager createLeaseCleanupManager(MetricsFactory metricsFactory); diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java index c38d442a8..fc71621d1 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseRefresher.java @@ -15,6 +15,9 @@ package software.amazon.kinesis.leases; import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; import software.amazon.kinesis.common.StreamIdentifier; import software.amazon.kinesis.leases.exceptions.DependencyException; @@ -75,6 +78,37 @@ boolean createLeaseTableIfNotExists(Long readCapacity, Long writeCapacity) */ boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException; + /** + * Creates the LeaseOwnerToLeaseKey index on the lease table if it doesn't exist and returns the status of index. + * + * @return indexStatus status of the index. + * @throws DependencyException if storage's describe API fails in an unexpected way + */ + default String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException { + return null; + } + + /** + * Blocks until the index exists by polling storage till either the index is ACTIVE or else timeout has + * happened. + * + * @param secondsBetweenPolls time to wait between polls in seconds + * @param timeoutSeconds total time to wait in seconds + * + * @return true if index on the table exists and is ACTIVE, false if timeout was reached + */ + default boolean waitUntilLeaseOwnerToLeaseKeyIndexExists( + final long secondsBetweenPolls, final long timeoutSeconds) { + return false; + } + + /** + * Check if leaseOwner GSI is ACTIVE + * @return true if index is active, false otherwise + * @throws DependencyException if storage's describe API fails in an unexpected way + */ + boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException; + /** * List all leases for a given stream synchronously. * @@ -87,6 +121,24 @@ boolean createLeaseTableIfNotExists(Long readCapacity, Long writeCapacity) List listLeasesForStream(StreamIdentifier streamIdentifier) throws DependencyException, InvalidStateException, ProvisionedThroughputException; + /** + * List all leases for a given workerIdentifier synchronously. + * Default implementation calls listLeases() and filters the results. + * + * @throws DependencyException if DynamoDB scan fails in an unexpected way + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity + * + * @return list of leases + */ + default List listLeaseKeysForWorker(final String workerIdentifier) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + return listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerIdentifier)) + .map(Lease::leaseKey) + .collect(Collectors.toList()); + } + /** * List all objects in table synchronously. * @@ -98,6 +150,23 @@ List listLeasesForStream(StreamIdentifier streamIdentifier) */ List listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException; + /** + * List all leases from the storage parallely and deserialize into Lease objects. Returns the list of leaseKey + * that failed deserialize separately. + * + * @param threadPool threadpool to use for parallel scan + * @param parallelismFactor no. of parallel scans + * @return Pair of List of leases from the storage and List of items failed to deserialize + * @throws DependencyException if DynamoDB scan fails in an unexpected way + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity + */ + default Map.Entry, List> listLeasesParallely( + final ExecutorService threadPool, final int parallelismFactor) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + throw new UnsupportedOperationException("listLeasesParallely is not implemented"); + } + /** * Create a new lease. Conditional on a lease not already existing with this shardId. * @@ -154,6 +223,47 @@ boolean createLeaseIfNotExists(Lease lease) boolean takeLease(Lease lease, String owner) throws DependencyException, InvalidStateException, ProvisionedThroughputException; + /** + * Assigns given lease to newOwner owner by incrementing its leaseCounter and setting its owner field. Conditional + * on the leaseOwner in DynamoDB matching the leaseOwner of the input lease. Mutates the leaseCounter and owner of + * the passed-in lease object after updating DynamoDB. + * + * @param lease the lease to be assigned + * @param newOwner the new owner + * + * @return true if lease was successfully assigned, false otherwise + * + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity + * @throws DependencyException if DynamoDB update fails in an unexpected way + */ + default boolean assignLease(final Lease lease, final String newOwner) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + + throw new UnsupportedOperationException("assignLease is not implemented"); + } + + /** + * Initiates a graceful handoff of the given lease to the specified new owner, allowing the current owner + * to complete its processing before transferring ownership. + *

+ * This method updates the lease with the new owner information but ensures that the current owner + * is given time to gracefully finish its work (e.g., processing records) before the lease is reassigned. + *

+ * + * @param lease the lease to be assigned + * @param newOwner the new owner + * @return true if a graceful handoff was successfully initiated + * @throws InvalidStateException if lease table does not exist + * @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity + * @throws DependencyException if DynamoDB update fails in an unexpected way + */ + default boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + + throw new UnsupportedOperationException("assignLeaseWithWait is not implemented"); + } + /** * Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of * the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB. diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java index 5d7bea63d..3c4692a92 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseSerializer.java @@ -15,6 +15,7 @@ package software.amazon.kinesis.leases; import java.util.Collection; +import java.util.Collections; import java.util.Map; import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition; @@ -100,6 +101,15 @@ default Map getDynamoExistentExpectation(String */ Map getDynamoTakeLeaseUpdate(Lease lease, String newOwner); + /** + * @param lease lease that needs to be assigned + * @param newOwner newLeaseOwner + * @return the attribute value map that takes a lease for a new owner + */ + default Map getDynamoAssignLeaseUpdate(Lease lease, String newOwner) { + throw new UnsupportedOperationException("getDynamoAssignLeaseUpdate is not implemented"); + } + /** * @param lease * @return the attribute value map that voids a lease @@ -127,8 +137,22 @@ default Map getDynamoUpdateLeaseUpdate(Lease lease */ Collection getKeySchema(); + default Collection getWorkerIdToLeaseKeyIndexKeySchema() { + return Collections.EMPTY_LIST; + } + + default Collection getWorkerIdToLeaseKeyIndexAttributeDefinitions() { + return Collections.EMPTY_LIST; + } + /** * @return attribute definitions for creating a DynamoDB table to store leases */ Collection getAttributeDefinitions(); + + /** + * @param lease + * @return the attribute value map that includes lease throughput + */ + Map getDynamoLeaseThroughputKbpsUpdate(Lease lease); } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java new file mode 100644 index 000000000..dcb5d6de4 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/LeaseStatsRecorder.java @@ -0,0 +1,158 @@ +package software.amazon.kinesis.leases; + +import java.util.LinkedList; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; + +import lombok.Builder; +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import software.amazon.awssdk.annotations.ThreadSafe; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.utils.ExponentialMovingAverage; + +import static java.util.Objects.isNull; + +/** + * This class records the stats for the leases. + * The stats are recorded in a thread safe queue, and the throughput is calculated by summing up the bytes and dividing + * by interval in seconds. + * This class is thread safe and backed by thread safe data structures. + */ +@RequiredArgsConstructor +@KinesisClientInternalApi +@ThreadSafe +public class LeaseStatsRecorder { + + /** + * This default alpha is chosen based on the testing so far between simple average and moving average with 0.5. + * In the future, if one value does not fit all use cases, inject this via config. + */ + private static final double DEFAULT_ALPHA = 0.5; + + public static final int BYTES_PER_KB = 1024; + + private final Long renewerFrequencyInMillis; + private final Map> leaseStatsMap = new ConcurrentHashMap<>(); + private final Map leaseKeyToExponentialMovingAverageMap = + new ConcurrentHashMap<>(); + private final Callable timeProviderInMillis; + + /** + * This method provides happens-before semantics (i.e., the action (access or removal) from a thread happens + * before the action from subsequent thread) for the stats recording in multithreaded environment. + */ + public void recordStats(@NonNull final LeaseStats leaseStats) { + final Queue leaseStatsQueue = + leaseStatsMap.computeIfAbsent(leaseStats.getLeaseKey(), lease -> new ConcurrentLinkedQueue<>()); + leaseStatsQueue.add(leaseStats); + } + + /** + * Calculates the throughput in KBps for the given leaseKey. + * Method first clears the items that are older than {@link #renewerFrequencyInMillis} from the queue and then + * calculates the throughput per second during {@link #renewerFrequencyInMillis} interval and then returns the + * ExponentialMovingAverage of the throughput. If method is called in quick succession with or without new stats + * the result can be different as ExponentialMovingAverage decays old values on every new call. + * This method is thread safe. + * @param leaseKey leaseKey for which stats are required + * @return throughput in Kbps, returns null if there is no stats available for the leaseKey. + */ + public Double getThroughputKBps(final String leaseKey) { + final Queue leaseStatsQueue = leaseStatsMap.get(leaseKey); + + if (isNull(leaseStatsQueue)) { + // This means there is no entry for this leaseKey yet + return null; + } + + filterExpiredEntries(leaseStatsQueue); + + // Convert bytes into KB and divide by interval in second to get throughput per second. + final ExponentialMovingAverage exponentialMovingAverage = leaseKeyToExponentialMovingAverageMap.computeIfAbsent( + leaseKey, leaseId -> new ExponentialMovingAverage(DEFAULT_ALPHA)); + + // Specifically dividing by 1000.0 rather than using Duration class to get seconds, because Duration class + // implementation rounds off to seconds and precision is lost. + final double frequency = renewerFrequencyInMillis / 1000.0; + final double throughput = readQueue(leaseStatsQueue).stream() + .mapToDouble(LeaseStats::getBytes) + .sum() + / BYTES_PER_KB + / frequency; + exponentialMovingAverage.add(throughput); + return exponentialMovingAverage.getValue(); + } + + /** + * Gets the currentTimeMillis and then iterates over the queue to get the stats with creation time less than + * currentTimeMillis. + * This is specifically done to avoid potential race between with high-frequency put thread blocking get thread. + */ + private Queue readQueue(final Queue leaseStatsQueue) { + final long currentTimeMillis = getCurrenTimeInMillis(); + final Queue response = new LinkedList<>(); + for (LeaseStats leaseStats : leaseStatsQueue) { + if (leaseStats.creationTimeMillis > currentTimeMillis) { + break; + } + response.add(leaseStats); + } + return response; + } + + private long getCurrenTimeInMillis() { + try { + return timeProviderInMillis.call(); + } catch (final Exception e) { + // Fallback to using the System.currentTimeMillis if failed. + return System.currentTimeMillis(); + } + } + + private void filterExpiredEntries(final Queue leaseStatsQueue) { + final long currentTime = getCurrenTimeInMillis(); + while (!leaseStatsQueue.isEmpty()) { + final LeaseStats leaseStats = leaseStatsQueue.peek(); + if (isNull(leaseStats) || currentTime - leaseStats.getCreationTimeMillis() < renewerFrequencyInMillis) { + break; + } + leaseStatsQueue.poll(); + } + } + + /** + * Clear the in-memory stats for the lease when a lease is reassigned (due to shut down or lease stealing) + * @param leaseKey leaseKey, for which stats are supposed to be clear. + */ + public void dropLeaseStats(final String leaseKey) { + leaseStatsMap.remove(leaseKey); + leaseKeyToExponentialMovingAverageMap.remove(leaseKey); + } + + @Builder + @Getter + @ToString + @KinesisClientInternalApi + public static final class LeaseStats { + /** + * Lease key for which this leaseStats object is created. + */ + private final String leaseKey; + /** + * Bytes that are processed for a lease + */ + private final long bytes; + /** + * Wall time in epoch millis at which this leaseStats object was created. This time is used to determine the + * expiry of the lease stats. + */ + @Builder.Default + private final long creationTimeMillis = System.currentTimeMillis(); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java index add8cf4f6..9b63883bd 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/ShardSyncTaskManager.java @@ -71,7 +71,7 @@ public class ShardSyncTaskManager { /** * Constructor. * - *

NOTE: This constructor is deprecated and will be removed in a future release.

+ * @deprecated This constructor is deprecated and will be removed in a future release. * * @param shardDetector * @param leaseRefresher @@ -92,18 +92,16 @@ public ShardSyncTaskManager( long shardSyncIdleTimeMillis, ExecutorService executorService, MetricsFactory metricsFactory) { - this.shardDetector = shardDetector; - this.leaseRefresher = leaseRefresher; - this.initialPositionInStream = initialPositionInStream; - this.cleanupLeasesUponShardCompletion = cleanupLeasesUponShardCompletion; - this.garbageCollectLeases = true; - this.ignoreUnexpectedChildShards = ignoreUnexpectedChildShards; - this.shardSyncIdleTimeMillis = shardSyncIdleTimeMillis; - this.executorService = executorService; - this.hierarchicalShardSyncer = new HierarchicalShardSyncer(); - this.metricsFactory = metricsFactory; - this.shardSyncRequestPending = new AtomicBoolean(false); - this.lock = new ReentrantLock(); + this( + shardDetector, + leaseRefresher, + initialPositionInStream, + cleanupLeasesUponShardCompletion, + ignoreUnexpectedChildShards, + shardSyncIdleTimeMillis, + executorService, + new HierarchicalShardSyncer(), + metricsFactory); } /** diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java index bef76ef05..7eb4c4f1a 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinator.java @@ -19,6 +19,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedTransferQueue; @@ -30,13 +31,17 @@ import java.util.stream.Collectors; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseCoordinator; +import software.amazon.kinesis.leases.LeaseDiscoverer; import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.LeaseTaker; import software.amazon.kinesis.leases.MultiStreamLease; import software.amazon.kinesis.leases.ShardInfo; @@ -44,6 +49,8 @@ import software.amazon.kinesis.leases.exceptions.InvalidStateException; import software.amazon.kinesis.leases.exceptions.LeasingException; import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.lifecycle.LeaseGracefulShutdownHandler; +import software.amazon.kinesis.lifecycle.ShardConsumer; import software.amazon.kinesis.metrics.MetricsFactory; import software.amazon.kinesis.metrics.MetricsLevel; import software.amazon.kinesis.metrics.MetricsScope; @@ -70,115 +77,34 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator { .setNameFormat("LeaseRenewer-%04d") .setDaemon(true) .build(); + private static final ThreadFactory LEASE_DISCOVERY_THREAD_FACTORY = new ThreadFactoryBuilder() + .setNameFormat("LeaseDiscovery-%04d") + .setDaemon(true) + .build(); private final LeaseRenewer leaseRenewer; private final LeaseTaker leaseTaker; + private final LeaseDiscoverer leaseDiscoverer; private final long renewerIntervalMillis; private final long takerIntervalMillis; + private final long leaseDiscovererIntervalMillis; private final ExecutorService leaseRenewalThreadpool; + private final ExecutorService leaseDiscoveryThreadPool; private final LeaseRefresher leaseRefresher; + private final LeaseStatsRecorder leaseStatsRecorder; + private final LeaseGracefulShutdownHandler leaseGracefulShutdownHandler; private long initialLeaseTableReadCapacity; private long initialLeaseTableWriteCapacity; protected final MetricsFactory metricsFactory; private final Object shutdownLock = new Object(); - + private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig; private ScheduledExecutorService leaseCoordinatorThreadPool; + private ScheduledFuture leaseDiscoveryFuture; private ScheduledFuture takerFuture; private volatile boolean running = false; - /** - * Constructor. - * - *

NOTE: This constructor is deprecated and will be removed in a future release.

- * - * @param leaseRefresher - * LeaseRefresher instance to use - * @param workerIdentifier - * Identifies the worker (e.g. useful to track lease ownership) - * @param leaseDurationMillis - * Duration of a lease - * @param epsilonMillis - * Allow for some variance when calculating lease expirations - * @param maxLeasesForWorker - * Max leases this Worker can handle at a time - * @param maxLeasesToStealAtOneTime - * Steal up to these many leases at a time (for load balancing) - * @param metricsFactory - * Used to publish metrics about lease operations - */ - @Deprecated - public DynamoDBLeaseCoordinator( - final LeaseRefresher leaseRefresher, - final String workerIdentifier, - final long leaseDurationMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewerThreadCount, - final MetricsFactory metricsFactory) { - this( - leaseRefresher, - workerIdentifier, - leaseDurationMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewerThreadCount, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY, - metricsFactory); - } - - /** - * Constructor. - * - * @param leaseRefresher - * LeaseRefresher instance to use - * @param workerIdentifier - * Identifies the worker (e.g. useful to track lease ownership) - * @param leaseDurationMillis - * Duration of a lease - * @param epsilonMillis - * Allow for some variance when calculating lease expirations - * @param maxLeasesForWorker - * Max leases this Worker can handle at a time - * @param maxLeasesToStealAtOneTime - * Steal up to these many leases at a time (for load balancing) - * @param initialLeaseTableReadCapacity - * Initial dynamodb lease table read iops if creating the lease table - * @param initialLeaseTableWriteCapacity - * Initial dynamodb lease table write iops if creating the lease table - * @param metricsFactory - * Used to publish metrics about lease operations - */ - @Deprecated - public DynamoDBLeaseCoordinator( - final LeaseRefresher leaseRefresher, - final String workerIdentifier, - final long leaseDurationMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewerThreadCount, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final MetricsFactory metricsFactory) { - this( - leaseRefresher, - workerIdentifier, - leaseDurationMillis, - LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewerThreadCount, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY, - metricsFactory); - } - /** * Constructor. * @@ -214,17 +140,35 @@ public DynamoDBLeaseCoordinator( final int maxLeaseRenewerThreadCount, final long initialLeaseTableReadCapacity, final long initialLeaseTableWriteCapacity, - final MetricsFactory metricsFactory) { + final MetricsFactory metricsFactory, + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig, + final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig, + final ConcurrentMap shardInfoShardConsumerMap) { this.leaseRefresher = leaseRefresher; - this.leaseRenewalThreadpool = getLeaseRenewalExecutorService(maxLeaseRenewerThreadCount); + this.leaseRenewalThreadpool = createExecutorService(maxLeaseRenewerThreadCount, LEASE_RENEWAL_THREAD_FACTORY); this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory) .withMaxLeasesForWorker(maxLeasesForWorker) .withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime) .withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment); - this.leaseRenewer = new DynamoDBLeaseRenewer( - leaseRefresher, workerIdentifier, leaseDurationMillis, leaseRenewalThreadpool, metricsFactory); this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis); this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2; + // Should run once every leaseDurationMillis to identify new leases before expiry. + this.leaseDiscovererIntervalMillis = leaseDurationMillis - epsilonMillis; + this.leaseStatsRecorder = new LeaseStatsRecorder(renewerIntervalMillis, System::currentTimeMillis); + this.leaseGracefulShutdownHandler = LeaseGracefulShutdownHandler.create( + gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis(), shardInfoShardConsumerMap, this); + this.leaseRenewer = new DynamoDBLeaseRenewer( + leaseRefresher, + workerIdentifier, + leaseDurationMillis, + leaseRenewalThreadpool, + metricsFactory, + leaseStatsRecorder, + leaseGracefulShutdownHandler::enqueueShutdown); + this.leaseDiscoveryThreadPool = + createExecutorService(maxLeaseRenewerThreadCount, LEASE_DISCOVERY_THREAD_FACTORY); + this.leaseDiscoverer = new DynamoDBLeaseDiscoverer( + this.leaseRefresher, this.leaseRenewer, metricsFactory, workerIdentifier, leaseDiscoveryThreadPool); if (initialLeaseTableReadCapacity <= 0) { throw new IllegalArgumentException("readCapacity should be >= 1"); } @@ -234,6 +178,7 @@ public DynamoDBLeaseCoordinator( } this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity; this.metricsFactory = metricsFactory; + this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig; log.info( "With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take" @@ -246,11 +191,49 @@ public DynamoDBLeaseCoordinator( maxLeasesToStealAtOneTime); } + @RequiredArgsConstructor + private class LeaseDiscoveryRunnable implements Runnable { + private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider; + + @Override + public void run() { + try { + // LeaseDiscoverer is run in WORKER_UTILIZATION_AWARE_ASSIGNMENT mode only + synchronized (shutdownLock) { + if (!leaseAssignmentModeProvider + .getLeaseAssignmentMode() + .equals( + MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode + .WORKER_UTILIZATION_AWARE_ASSIGNMENT)) { + return; + } + if (running) { + leaseRenewer.addLeasesToRenew(leaseDiscoverer.discoverNewLeases()); + } + } + } catch (Exception e) { + log.error("Failed to execute lease discovery", e); + } + } + } + + @RequiredArgsConstructor private class TakerRunnable implements Runnable { + private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider; @Override public void run() { try { + // LeaseTaker is run in DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT mode only + synchronized (shutdownLock) { + if (!leaseAssignmentModeProvider + .getLeaseAssignmentMode() + .equals( + MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode + .DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)) { + return; + } + } runLeaseTaker(); } catch (LeasingException e) { log.error("LeasingException encountered in lease taking thread", e); @@ -290,18 +273,35 @@ public void initialize() throws ProvisionedThroughputException, DependencyExcept } @Override - public void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException { + public void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { leaseRenewer.initialize(); + // At max, we need 3 threads - lease renewer, lease taker, lease discoverer - to run without contention. + leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(3, LEASE_COORDINATOR_THREAD_FACTORY); + + // During migration to KCLv3.x from KCLv2.x, lease assignment mode can change dynamically, so + // both lease assignment algorithms will be started but only one will execute based on + // leaseAssignmentModeProvider.getLeaseAssignmentMode(). However for new applications starting in + // KCLv3.x or applications successfully migrated to KCLv3.x, lease assignment mode will not + // change dynamically and will always be WORKER_UTILIZATION_AWARE_ASSIGNMENT, therefore + // don't initialize KCLv2.x lease assignment algorithm components that are not needed. + if (leaseAssignmentModeProvider.dynamicModeChangeSupportNeeded()) { + // Taker runs with fixed DELAY because we want it to run slower in the event of performance degradation. + takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay( + new TakerRunnable(leaseAssignmentModeProvider), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS); + } - // 2 because we know we'll have at most 2 concurrent tasks at a time. - leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(2, LEASE_COORDINATOR_THREAD_FACTORY); + leaseDiscoveryFuture = leaseCoordinatorThreadPool.scheduleAtFixedRate( + new LeaseDiscoveryRunnable(leaseAssignmentModeProvider), + 0L, + leaseDiscovererIntervalMillis, + TimeUnit.MILLISECONDS); - // Taker runs with fixed DELAY because we want it to run slower in the event of performance degredation. - takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay( - new TakerRunnable(), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS); - // Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degredation. + // Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degradation. leaseCoordinatorThreadPool.scheduleAtFixedRate( new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS); + + leaseGracefulShutdownHandler.start(); running = true; } @@ -383,6 +383,8 @@ public void stop() { } leaseRenewalThreadpool.shutdownNow(); + leaseCoordinatorThreadPool.shutdownNow(); + leaseGracefulShutdownHandler.stop(); synchronized (shutdownLock) { leaseRenewer.clearCurrentlyHeldLeases(); running = false; @@ -393,6 +395,10 @@ public void stop() { public void stopLeaseTaker() { if (takerFuture != null) { takerFuture.cancel(false); + leaseDiscoveryFuture.cancel(false); + // the method is called in worker graceful shutdown. We want to stop any further lease shutdown + // so we don't interrupt worker shutdown. + leaseGracefulShutdownHandler.stop(); } } @@ -418,20 +424,15 @@ public boolean updateLease( } /** - * Returns executor service that should be used for lease renewal. + * Returns executor service for given ThreadFactory. * @param maximumPoolSize Maximum allowed thread pool size - * @return Executor service that should be used for lease renewal. + * @return Executor service */ - private static ExecutorService getLeaseRenewalExecutorService(int maximumPoolSize) { + private static ExecutorService createExecutorService(final int maximumPoolSize, final ThreadFactory threadFactory) { int coreLeaseCount = Math.max(maximumPoolSize / 4, 2); return new ThreadPoolExecutor( - coreLeaseCount, - maximumPoolSize, - 60, - TimeUnit.SECONDS, - new LinkedTransferQueue<>(), - LEASE_RENEWAL_THREAD_FACTORY); + coreLeaseCount, maximumPoolSize, 60, TimeUnit.SECONDS, new LinkedTransferQueue<>(), threadFactory); } @Override @@ -472,6 +473,8 @@ public static ShardInfo convertLeaseToAssignment(final Lease lease) { * {@inheritDoc} * *

NOTE: This method is deprecated. Please set the initial capacity through the constructor.

+ * + * This is a method of the public lease coordinator interface. */ @Override @Deprecated @@ -487,6 +490,8 @@ public DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity) * {@inheritDoc} * *

NOTE: This method is deprecated. Please set the initial capacity through the constructor.

+ * + * This is a method of the public lease coordinator interface. */ @Override @Deprecated @@ -497,4 +502,9 @@ public DynamoDBLeaseCoordinator initialLeaseTableWriteCapacity(long writeCapacit initialLeaseTableWriteCapacity = writeCapacity; return this; } + + @Override + public LeaseStatsRecorder leaseStatsRecorder() { + return leaseStatsRecorder; + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java new file mode 100644 index 000000000..ce5605ee3 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscoverer.java @@ -0,0 +1,120 @@ +package software.amazon.kinesis.leases.dynamodb; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseDiscoverer; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +import static java.util.Objects.isNull; + +/** + * An implementation of {@link LeaseDiscoverer}, it uses {@link LeaseRefresher} to query + * {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME } and find the leases assigned + * to current worker and then filter and returns the leases that have not started processing (looks at + * {@link LeaseRenewer#getCurrentlyHeldLeases()} to find out which leases are currently held leases). + */ +@Slf4j +@RequiredArgsConstructor +public class DynamoDBLeaseDiscoverer implements LeaseDiscoverer { + + private final LeaseRefresher leaseRefresher; + private final LeaseRenewer leaseRenewer; + private final MetricsFactory metricsFactory; + private final String workerIdentifier; + private final ExecutorService executorService; + + @Override + public List discoverNewLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(metricsFactory, "LeaseDiscovery"); + long startTime = System.currentTimeMillis(); + boolean success = false; + try { + final Set currentHeldLeaseKeys = + leaseRenewer.getCurrentlyHeldLeases().keySet(); + + final long listLeaseKeysForWorkerStartTime = System.currentTimeMillis(); + final List leaseKeys = leaseRefresher.listLeaseKeysForWorker(workerIdentifier); + MetricsUtil.addLatency( + metricsScope, "ListLeaseKeysForWorker", listLeaseKeysForWorkerStartTime, MetricsLevel.DETAILED); + + final List newLeaseKeys = leaseKeys.stream() + .filter(leaseKey -> !currentHeldLeaseKeys.contains(leaseKey)) + .collect(Collectors.toList()); + + final long fetchNewLeasesStartTime = System.currentTimeMillis(); + final List> completableFutures = newLeaseKeys.stream() + .map(leaseKey -> + CompletableFuture.supplyAsync(() -> fetchLease(leaseKey, metricsScope), executorService)) + .collect(Collectors.toList()); + + final List newLeases = completableFutures.stream() + .map(CompletableFuture::join) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + log.info( + "New leases assigned to worker : {}, count : {}, leases : {}", + workerIdentifier, + newLeases.size(), + newLeases.stream().map(Lease::leaseKey).collect(Collectors.toList())); + + MetricsUtil.addLatency(metricsScope, "FetchNewLeases", fetchNewLeasesStartTime, MetricsLevel.DETAILED); + + success = true; + MetricsUtil.addCount(metricsScope, "NewLeasesDiscovered", newLeases.size(), MetricsLevel.DETAILED); + return newLeases; + } finally { + MetricsUtil.addWorkerIdentifier(metricsScope, workerIdentifier); + MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY); + MetricsUtil.endScope(metricsScope); + } + } + + private Lease fetchLease(final String leaseKey, final MetricsScope metricsScope) { + try { + final Lease lease = leaseRefresher.getLease(leaseKey); + if (isNull(lease)) { + return null; + } + // GSI is eventually consistent thus, validate that the fetched lease is indeed assigned to this + // worker, if not just pass in this run. + if (!lease.leaseOwner().equals(workerIdentifier)) { + MetricsUtil.addCount(metricsScope, "OwnerMismatch", 1, MetricsLevel.DETAILED); + return null; + } + // if checkpointOwner is not null, it means that the lease is still pending shutdown for the last owner. + // Don't add the lease to the in-memory map yet. + if (lease.checkpointOwner() != null) { + return null; + } + // when a new lease is discovered, set the lastCounterIncrementNanos to current time as the time + // when it has become visible, on next renewer interval this will be updated by LeaseRenewer to + // correct time. + lease.lastCounterIncrementNanos(System.nanoTime()); + return lease; + } catch (final Exception e) { + // if getLease on some lease key fail, continue and fetch other leases, the one failed will + // be fetched in the next iteration or will be reassigned if stayed idle for long. + MetricsUtil.addCount(metricsScope, "GetLease:Error", 1, MetricsLevel.SUMMARY); + log.error("GetLease failed for leaseKey : {}", leaseKey, e); + return null; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseManagementFactory.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseManagementFactory.java index e5435bfc7..7d902afd3 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseManagementFactory.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseManagementFactory.java @@ -17,22 +17,24 @@ import java.time.Duration; import java.util.Collection; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.function.Function; import lombok.Data; import lombok.NonNull; -import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.NotNull; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; import software.amazon.awssdk.services.dynamodb.model.BillingMode; import software.amazon.awssdk.services.dynamodb.model.Tag; import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; import software.amazon.kinesis.annotations.KinesisClientInternalApi; -import software.amazon.kinesis.common.InitialPositionInStreamExtended; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.common.LeaseCleanupConfig; import software.amazon.kinesis.common.StreamConfig; -import software.amazon.kinesis.common.StreamIdentifier; import software.amazon.kinesis.coordinator.DeletedStreamListProvider; import software.amazon.kinesis.leases.HierarchicalShardSyncer; import software.amazon.kinesis.leases.KinesisShardDetector; @@ -42,12 +44,15 @@ import software.amazon.kinesis.leases.LeaseManagementFactory; import software.amazon.kinesis.leases.LeaseSerializer; import software.amazon.kinesis.leases.ShardDetector; +import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardSyncTaskManager; +import software.amazon.kinesis.lifecycle.ShardConsumer; import software.amazon.kinesis.metrics.MetricsFactory; /** * */ +@Slf4j @Data @KinesisClientInternalApi public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory { @@ -68,849 +73,41 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory { private final ExecutorService executorService; @NonNull - private final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer; - - @NonNull - private final LeaseSerializer leaseSerializer; - - @NonNull - private StreamConfig streamConfig; - - private Function customShardDetectorProvider; - - private final long failoverTimeMillis; - private final boolean enablePriorityLeaseAssignment; - private final long epsilonMillis; - private final int maxLeasesForWorker; - private final int maxLeasesToStealAtOneTime; - private final int maxLeaseRenewalThreads; - private final boolean cleanupLeasesUponShardCompletion; - private final boolean ignoreUnexpectedChildShards; - private final long shardSyncIntervalMillis; - private final boolean consistentReads; - private final long listShardsBackoffTimeMillis; - private final int maxListShardsRetryAttempts; - private final int maxCacheMissesBeforeReload; - private final long listShardsCacheAllowedAgeInSeconds; - private final int cacheMissWarningModulus; - private final long initialLeaseTableReadCapacity; - private final long initialLeaseTableWriteCapacity; - private final TableCreatorCallback tableCreatorCallback; - private final Duration dynamoDbRequestTimeout; - private final BillingMode billingMode; - private final boolean leaseTableDeletionProtectionEnabled; - private final boolean leaseTablePitrEnabled; - private final Collection tags; - private final boolean isMultiStreamMode; - private final LeaseCleanupConfig leaseCleanupConfig; - - /** - * Constructor. - * - *

NOTE: This constructor is deprecated and will be removed in a future release.

- * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus) { - this( - kinesisClient, - streamName, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - initialPositionInStream, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY, - TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY); - } - - /** - * Constructor. - * - *

- * NOTE: This constructor is deprecated and will be removed in a future release. - *

- * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity) { - this( - kinesisClient, - streamName, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - initialPositionInStream, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - new HierarchicalShardSyncer(), - TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT); - } - - /** - * Constructor. - * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param hierarchicalShardSyncer - * @param tableCreatorCallback - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer hierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback) { - this( - kinesisClient, - streamName, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - initialPositionInStream, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - hierarchicalShardSyncer, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT); - } - - /** - * Constructor. - * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param hierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer hierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout) { - this( - kinesisClient, - streamName, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - initialPositionInStream, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - hierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - BillingMode.PAY_PER_REQUEST); - } - - /** - * Constructor. - * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param hierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer hierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode) { - - this( - kinesisClient, - new StreamConfig(StreamIdentifier.singleStreamInstance(streamName), initialPositionInStream), - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - hierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - new DynamoDBLeaseSerializer()); - } - - /** - * Constructor. - * - * @param kinesisClient - * @param streamName - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param initialPositionInStream - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param hierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - * @param tags - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final String streamName, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final InitialPositionInStreamExtended initialPositionInStream, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer hierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, - Collection tags) { - - this( - kinesisClient, - new StreamConfig(StreamIdentifier.singleStreamInstance(streamName), initialPositionInStream), - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - hierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - new DynamoDBLeaseSerializer()); - } - - /** - * Constructor. - * - * @param kinesisClient - * @param streamConfig - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param deprecatedHierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - */ - @Deprecated - private DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final StreamConfig streamConfig, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, - LeaseSerializer leaseSerializer) { - this( - kinesisClient, - streamConfig, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - deprecatedHierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, - DefaultSdkAutoConstructList.getInstance(), - leaseSerializer); - } + private final LeaseSerializer leaseSerializer; - /** - * Constructor. - * - * @param kinesisClient - * @param streamConfig - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param deprecatedHierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - * @param leaseTableDeletionProtectionEnabled - * @param tags - */ - @Deprecated - private DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final StreamConfig streamConfig, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, - final boolean leaseTableDeletionProtectionEnabled, - Collection tags, - LeaseSerializer leaseSerializer) { - this( - kinesisClient, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - deprecatedHierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - leaseTableDeletionProtectionEnabled, - tags, - leaseSerializer, - null, - false, - LeaseManagementConfig.DEFAULT_LEASE_CLEANUP_CONFIG); - this.streamConfig = streamConfig; - } + private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig; - /** - * Constructor. - * @param kinesisClient - * @param dynamoDBClient - * @param tableName - * @param workerIdentifier - * @param executorService - * @param failoverTimeMillis - * @param epsilonMillis - * @param maxLeasesForWorker - * @param maxLeasesToStealAtOneTime - * @param maxLeaseRenewalThreads - * @param cleanupLeasesUponShardCompletion - * @param ignoreUnexpectedChildShards - * @param shardSyncIntervalMillis - * @param consistentReads - * @param listShardsBackoffTimeMillis - * @param maxListShardsRetryAttempts - * @param maxCacheMissesBeforeReload - * @param listShardsCacheAllowedAgeInSeconds - * @param cacheMissWarningModulus - * @param initialLeaseTableReadCapacity - * @param initialLeaseTableWriteCapacity - * @param deprecatedHierarchicalShardSyncer - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - * @param leaseTableDeletionProtectionEnabled - * @param leaseSerializer - * @param customShardDetectorProvider - * @param isMultiStreamMode - * @param leaseCleanupConfig - */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final long failoverTimeMillis, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, - final boolean leaseTableDeletionProtectionEnabled, - Collection tags, - LeaseSerializer leaseSerializer, - Function customShardDetectorProvider, - boolean isMultiStreamMode, - LeaseCleanupConfig leaseCleanupConfig) { - this( - kinesisClient, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - deprecatedHierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - leaseTableDeletionProtectionEnabled, - tags, - leaseSerializer, - customShardDetectorProvider, - isMultiStreamMode, - leaseCleanupConfig); - } + @NonNull + private StreamConfig streamConfig; + + private Function customShardDetectorProvider; + + private final long failoverTimeMillis; + private final boolean enablePriorityLeaseAssignment; + private final long epsilonMillis; + private final int maxLeasesForWorker; + private final int maxLeasesToStealAtOneTime; + private final int maxLeaseRenewalThreads; + private final boolean cleanupLeasesUponShardCompletion; + private final boolean ignoreUnexpectedChildShards; + private final long shardSyncIntervalMillis; + private final boolean consistentReads; + private final long listShardsBackoffTimeMillis; + private final int maxListShardsRetryAttempts; + private final int maxCacheMissesBeforeReload; + private final long listShardsCacheAllowedAgeInSeconds; + private final int cacheMissWarningModulus; + private final long initialLeaseTableReadCapacity; + private final long initialLeaseTableWriteCapacity; + private final TableCreatorCallback tableCreatorCallback; + private final Duration dynamoDbRequestTimeout; + private final BillingMode billingMode; + private final boolean leaseTableDeletionProtectionEnabled; + private final boolean leaseTablePitrEnabled; + private final Collection tags; + private final boolean isMultiStreamMode; + private final LeaseCleanupConfig leaseCleanupConfig; + private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig; /** * Constructor. @@ -936,92 +133,24 @@ public DynamoDBLeaseManagementFactory( * @param cacheMissWarningModulus * @param initialLeaseTableReadCapacity * @param initialLeaseTableWriteCapacity - * @param deprecatedHierarchicalShardSyncer * @param tableCreatorCallback * @param dynamoDbRequestTimeout * @param billingMode * @param leaseTableDeletionProtectionEnabled + * @param leaseTablePitrEnabled * @param leaseSerializer * @param customShardDetectorProvider * @param isMultiStreamMode * @param leaseCleanupConfig + * @param workerUtilizationAwareAssignmentConfig + * @param gracefulLeaseHandoffConfig */ - @Deprecated - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, - final long failoverTimeMillis, - final boolean enablePriorityLeaseAssignment, - final long epsilonMillis, - final int maxLeasesForWorker, - final int maxLeasesToStealAtOneTime, - final int maxLeaseRenewalThreads, - final boolean cleanupLeasesUponShardCompletion, - final boolean ignoreUnexpectedChildShards, - final long shardSyncIntervalMillis, - final boolean consistentReads, - final long listShardsBackoffTimeMillis, - final int maxListShardsRetryAttempts, - final int maxCacheMissesBeforeReload, - final long listShardsCacheAllowedAgeInSeconds, - final int cacheMissWarningModulus, - final long initialLeaseTableReadCapacity, - final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer, - final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, - final boolean leaseTableDeletionProtectionEnabled, - Collection tags, - LeaseSerializer leaseSerializer, - Function customShardDetectorProvider, - boolean isMultiStreamMode, - LeaseCleanupConfig leaseCleanupConfig) { - this( - kinesisClient, - dynamoDBClient, - tableName, - workerIdentifier, - executorService, - failoverTimeMillis, - enablePriorityLeaseAssignment, - epsilonMillis, - maxLeasesForWorker, - maxLeasesToStealAtOneTime, - maxLeaseRenewalThreads, - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - consistentReads, - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - maxCacheMissesBeforeReload, - listShardsCacheAllowedAgeInSeconds, - cacheMissWarningModulus, - initialLeaseTableReadCapacity, - initialLeaseTableWriteCapacity, - deprecatedHierarchicalShardSyncer, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - leaseTableDeletionProtectionEnabled, - LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, - tags, - leaseSerializer, - customShardDetectorProvider, - isMultiStreamMode, - leaseCleanupConfig); - } - public DynamoDBLeaseManagementFactory( - final KinesisAsyncClient kinesisClient, - final DynamoDbAsyncClient dynamoDBClient, - final String tableName, - final String workerIdentifier, - final ExecutorService executorService, + final @NotNull KinesisAsyncClient kinesisClient, + final @NotNull DynamoDbAsyncClient dynamoDBClient, + final @NotNull String tableName, + final @NotNull String workerIdentifier, + final @NotNull ExecutorService executorService, final long failoverTimeMillis, final boolean enablePriorityLeaseAssignment, final long epsilonMillis, @@ -1039,17 +168,18 @@ public DynamoDBLeaseManagementFactory( final int cacheMissWarningModulus, final long initialLeaseTableReadCapacity, final long initialLeaseTableWriteCapacity, - final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer, final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - BillingMode billingMode, + final Duration dynamoDbRequestTimeout, + final BillingMode billingMode, final boolean leaseTableDeletionProtectionEnabled, final boolean leaseTablePitrEnabled, - Collection tags, - LeaseSerializer leaseSerializer, - Function customShardDetectorProvider, + final Collection tags, + final @NotNull LeaseSerializer leaseSerializer, + final Function customShardDetectorProvider, boolean isMultiStreamMode, - LeaseCleanupConfig leaseCleanupConfig) { + final LeaseCleanupConfig leaseCleanupConfig, + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig, + final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig) { this.kinesisClient = kinesisClient; this.dynamoDBClient = dynamoDBClient; this.tableName = tableName; @@ -1072,7 +202,6 @@ public DynamoDBLeaseManagementFactory( this.cacheMissWarningModulus = cacheMissWarningModulus; this.initialLeaseTableReadCapacity = initialLeaseTableReadCapacity; this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity; - this.deprecatedHierarchicalShardSyncer = deprecatedHierarchicalShardSyncer; this.tableCreatorCallback = tableCreatorCallback; this.dynamoDbRequestTimeout = dynamoDbRequestTimeout; this.billingMode = billingMode; @@ -1083,10 +212,19 @@ public DynamoDBLeaseManagementFactory( this.isMultiStreamMode = isMultiStreamMode; this.leaseCleanupConfig = leaseCleanupConfig; this.tags = tags; + this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig; + this.gracefulLeaseHandoffConfig = gracefulLeaseHandoffConfig; } @Override public LeaseCoordinator createLeaseCoordinator(@NonNull final MetricsFactory metricsFactory) { + return createLeaseCoordinator(metricsFactory, new ConcurrentHashMap<>()); + } + + @Override + public LeaseCoordinator createLeaseCoordinator( + @NonNull final MetricsFactory metricsFactory, + @NonNull final ConcurrentMap shardInfoShardConsumerMap) { return new DynamoDBLeaseCoordinator( this.createLeaseRefresher(), workerIdentifier, @@ -1098,33 +236,10 @@ public LeaseCoordinator createLeaseCoordinator(@NonNull final MetricsFactory met maxLeaseRenewalThreads, initialLeaseTableReadCapacity, initialLeaseTableWriteCapacity, - metricsFactory); - } - - @Override - @Deprecated - public ShardSyncTaskManager createShardSyncTaskManager(@NonNull final MetricsFactory metricsFactory) { - return new ShardSyncTaskManager( - this.createShardDetector(), - this.createLeaseRefresher(), - streamConfig.initialPositionInStreamExtended(), - cleanupLeasesUponShardCompletion, - ignoreUnexpectedChildShards, - shardSyncIntervalMillis, - executorService, - deprecatedHierarchicalShardSyncer, - metricsFactory); - } - - /** - * Create ShardSyncTaskManager from the streamConfig passed - * @param metricsFactory - * @param streamConfig - * @return ShardSyncTaskManager - */ - @Override - public ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) { - return createShardSyncTaskManager(metricsFactory, streamConfig, null); + metricsFactory, + workerUtilizationAwareAssignmentConfig, + gracefulLeaseHandoffConfig, + shardInfoShardConsumerMap); } /** @@ -1155,6 +270,10 @@ public ShardSyncTaskManager createShardSyncTaskManager( @Override public DynamoDBLeaseRefresher createLeaseRefresher() { + final DdbTableConfig ddbTableConfig = new DdbTableConfig(); + ddbTableConfig.billingMode(billingMode); + ddbTableConfig.readCapacity(initialLeaseTableReadCapacity); + ddbTableConfig.writeCapacity(initialLeaseTableWriteCapacity); return new DynamoDBLeaseRefresher( tableName, dynamoDBClient, @@ -1162,26 +281,12 @@ public DynamoDBLeaseRefresher createLeaseRefresher() { consistentReads, tableCreatorCallback, dynamoDbRequestTimeout, - billingMode, + ddbTableConfig, leaseTableDeletionProtectionEnabled, leaseTablePitrEnabled, tags); } - @Override - @Deprecated - public ShardDetector createShardDetector() { - return new KinesisShardDetector( - kinesisClient, - streamConfig.streamIdentifier(), - listShardsBackoffTimeMillis, - maxListShardsRetryAttempts, - listShardsCacheAllowedAgeInSeconds, - maxCacheMissesBeforeReload, - cacheMissWarningModulus, - dynamoDbRequestTimeout); - } - /** * KinesisShardDetector supports reading from service only using streamName. Support for accountId and * stream creation epoch is yet to be provided. diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresher.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresher.java index 123f4068d..593e40dad 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresher.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresher.java @@ -15,48 +15,68 @@ package software.amazon.kinesis.leases.dynamodb; import java.time.Duration; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import com.google.common.collect.ImmutableMap; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeAction; import software.amazon.awssdk.services.dynamodb.model.AttributeValue; import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate; import software.amazon.awssdk.services.dynamodb.model.BillingMode; import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException; +import software.amazon.awssdk.services.dynamodb.model.CreateGlobalSecondaryIndexAction; import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest; import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; import software.amazon.awssdk.services.dynamodb.model.DynamoDbException; +import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue; import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.GlobalSecondaryIndexDescription; +import software.amazon.awssdk.services.dynamodb.model.GlobalSecondaryIndexUpdate; +import software.amazon.awssdk.services.dynamodb.model.IndexStatus; import software.amazon.awssdk.services.dynamodb.model.LimitExceededException; +import software.amazon.awssdk.services.dynamodb.model.Projection; +import software.amazon.awssdk.services.dynamodb.model.ProjectionType; import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput; import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException; import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.QueryRequest; +import software.amazon.awssdk.services.dynamodb.model.QueryResponse; import software.amazon.awssdk.services.dynamodb.model.ResourceInUseException; import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.ReturnValue; +import software.amazon.awssdk.services.dynamodb.model.ReturnValuesOnConditionCheckFailure; import software.amazon.awssdk.services.dynamodb.model.ScanRequest; import software.amazon.awssdk.services.dynamodb.model.ScanResponse; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; import software.amazon.awssdk.services.dynamodb.model.TableStatus; import software.amazon.awssdk.services.dynamodb.model.Tag; import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest; import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse; +import software.amazon.awssdk.services.dynamodb.model.UpdateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.UpdateTableResponse; import software.amazon.awssdk.utils.CollectionUtils; import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.common.FutureUtils; import software.amazon.kinesis.common.StreamIdentifier; +import software.amazon.kinesis.leases.DynamoUtils; import software.amazon.kinesis.leases.Lease; -import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseSerializer; import software.amazon.kinesis.leases.UpdateField; @@ -66,12 +86,19 @@ import software.amazon.kinesis.retrieval.AWSExceptionManager; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; +import static software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer.CHECKPOINT_OWNER; +import static software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer.LEASE_KEY_KEY; +import static software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer.LEASE_OWNER_KEY; + /** * An implementation of {@link LeaseRefresher} that uses DynamoDB. */ @Slf4j @KinesisClientInternalApi public class DynamoDBLeaseRefresher implements LeaseRefresher { + static final String LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME = "LeaseOwnerToLeaseKeyIndex"; protected final String table; protected final DynamoDbAsyncClient dynamoDBClient; @@ -80,7 +107,7 @@ public class DynamoDBLeaseRefresher implements LeaseRefresher { private final TableCreatorCallback tableCreatorCallback; private final Duration dynamoDbRequestTimeout; - private final BillingMode billingMode; + private final DdbTableConfig ddbTableConfig; private final boolean leaseTableDeletionProtectionEnabled; private final boolean leaseTablePitrEnabled; private final Collection tags; @@ -90,111 +117,15 @@ public class DynamoDBLeaseRefresher implements LeaseRefresher { private static final String STREAM_NAME = "streamName"; private static final String DDB_STREAM_NAME = ":streamName"; - /** - * Constructor. - * - *

- * NOTE: This constructor is deprecated and will be removed in a future release. - *

- * - * @param table - * @param dynamoDBClient - * @param serializer - * @param consistentReads - */ - @Deprecated - public DynamoDBLeaseRefresher( - final String table, - final DynamoDbAsyncClient dynamoDBClient, - final LeaseSerializer serializer, - final boolean consistentReads) { - this(table, dynamoDBClient, serializer, consistentReads, TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK); - } + private static final String DDB_LEASE_OWNER = ":" + LEASE_OWNER_KEY; - /** - * Constructor. - * - * @param table - * @param dynamoDBClient - * @param serializer - * @param consistentReads - * @param tableCreatorCallback - */ - @Deprecated - public DynamoDBLeaseRefresher( - final String table, - final DynamoDbAsyncClient dynamoDBClient, - final LeaseSerializer serializer, - final boolean consistentReads, - @NonNull final TableCreatorCallback tableCreatorCallback) { - this( - table, - dynamoDBClient, - serializer, - consistentReads, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT); - } + private static final String LEASE_OWNER_INDEX_QUERY_CONDITIONAL_EXPRESSION = + String.format("%s = %s", LEASE_OWNER_KEY, DDB_LEASE_OWNER); - /** - * Constructor. - * @param table - * @param dynamoDBClient - * @param serializer - * @param consistentReads - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - */ - @Deprecated - public DynamoDBLeaseRefresher( - final String table, - final DynamoDbAsyncClient dynamoDBClient, - final LeaseSerializer serializer, - final boolean consistentReads, - @NonNull final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout) { - this( - table, - dynamoDBClient, - serializer, - consistentReads, - tableCreatorCallback, - dynamoDbRequestTimeout, - BillingMode.PAY_PER_REQUEST, - LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED); - } - - /** - * Constructor. - * @param table - * @param dynamoDBClient - * @param serializer - * @param consistentReads - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - * @param leaseTableDeletionProtectionEnabled - */ - @Deprecated - public DynamoDBLeaseRefresher( - final String table, - final DynamoDbAsyncClient dynamoDBClient, - final LeaseSerializer serializer, - final boolean consistentReads, - @NonNull final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - final BillingMode billingMode, - final boolean leaseTableDeletionProtectionEnabled) { - this( - table, - dynamoDBClient, - serializer, - consistentReads, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - leaseTableDeletionProtectionEnabled, - DefaultSdkAutoConstructList.getInstance()); + private static DdbTableConfig createDdbTableConfigFromBillingMode(final BillingMode billingMode) { + final DdbTableConfig tableConfig = new DdbTableConfig(); + tableConfig.billingMode(billingMode); + return tableConfig; } /** @@ -205,45 +136,11 @@ public DynamoDBLeaseRefresher( * @param consistentReads * @param tableCreatorCallback * @param dynamoDbRequestTimeout - * @param billingMode + * @param ddbTableConfig * @param leaseTableDeletionProtectionEnabled + * @param leaseTablePitrEnabled * @param tags */ - @Deprecated - public DynamoDBLeaseRefresher( - final String table, - final DynamoDbAsyncClient dynamoDBClient, - final LeaseSerializer serializer, - final boolean consistentReads, - @NonNull final TableCreatorCallback tableCreatorCallback, - Duration dynamoDbRequestTimeout, - final BillingMode billingMode, - final boolean leaseTableDeletionProtectionEnabled, - final Collection tags) { - this( - table, - dynamoDBClient, - serializer, - consistentReads, - tableCreatorCallback, - dynamoDbRequestTimeout, - billingMode, - leaseTableDeletionProtectionEnabled, - LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, - tags); - } - - /** - * Constructor. - * @param table - * @param dynamoDBClient - * @param serializer - * @param consistentReads - * @param tableCreatorCallback - * @param dynamoDbRequestTimeout - * @param billingMode - * @param leaseTableDeletionProtectionEnabled - */ public DynamoDBLeaseRefresher( final String table, final DynamoDbAsyncClient dynamoDBClient, @@ -251,7 +148,7 @@ public DynamoDBLeaseRefresher( final boolean consistentReads, @NonNull final TableCreatorCallback tableCreatorCallback, Duration dynamoDbRequestTimeout, - final BillingMode billingMode, + final DdbTableConfig ddbTableConfig, final boolean leaseTableDeletionProtectionEnabled, final boolean leaseTablePitrEnabled, final Collection tags) { @@ -261,7 +158,7 @@ public DynamoDBLeaseRefresher( this.consistentReads = consistentReads; this.tableCreatorCallback = tableCreatorCallback; this.dynamoDbRequestTimeout = dynamoDbRequestTimeout; - this.billingMode = billingMode; + this.ddbTableConfig = ddbTableConfig; this.leaseTableDeletionProtectionEnabled = leaseTableDeletionProtectionEnabled; this.leaseTablePitrEnabled = leaseTablePitrEnabled; this.tags = tags; @@ -269,18 +166,16 @@ public DynamoDBLeaseRefresher( /** * {@inheritDoc} + * This method always creates table in PROVISIONED mode and with RCU and WCU provided as method args */ @Override public boolean createLeaseTableIfNotExists(@NonNull final Long readCapacity, @NonNull final Long writeCapacity) throws ProvisionedThroughputException, DependencyException { - final CreateTableRequest.Builder builder = createTableRequestBuilder(); - if (BillingMode.PROVISIONED.equals(billingMode)) { - ProvisionedThroughput throughput = ProvisionedThroughput.builder() - .readCapacityUnits(readCapacity) - .writeCapacityUnits(writeCapacity) - .build(); - builder.provisionedThroughput(throughput); - } + + final DdbTableConfig overriddenTableConfig = createDdbTableConfigFromBillingMode(BillingMode.PROVISIONED); + overriddenTableConfig.readCapacity(readCapacity); + overriddenTableConfig.writeCapacity(writeCapacity); + final CreateTableRequest.Builder builder = createTableRequestBuilder(overriddenTableConfig); return createTableIfNotExists(builder.build()); } @@ -289,15 +184,14 @@ public boolean createLeaseTableIfNotExists(@NonNull final Long readCapacity, @No */ @Override public boolean createLeaseTableIfNotExists() throws ProvisionedThroughputException, DependencyException { - final CreateTableRequest request = createTableRequestBuilder().build(); + final CreateTableRequest request = + createTableRequestBuilder(ddbTableConfig).build(); boolean tableExists = createTableIfNotExists(request); - if (leaseTablePitrEnabled) { enablePitr(); log.info("Enabled PITR on table {}", table); } - return tableExists; } @@ -323,7 +217,7 @@ private void enablePitr() throws DependencyException { private boolean createTableIfNotExists(CreateTableRequest request) throws ProvisionedThroughputException, DependencyException { try { - if (tableStatus() != null) { + if (describeLeaseTable() != null) { return newTableCreated; } } catch (DependencyException de) { @@ -367,7 +261,12 @@ public boolean leaseTableExists() throws DependencyException { } private TableStatus tableStatus() throws DependencyException { - DescribeTableRequest request = + final DescribeTableResponse response = describeLeaseTable(); + return nonNull(response) ? response.table().tableStatus() : null; + } + + private DescribeTableResponse describeLeaseTable() throws DependencyException { + final DescribeTableRequest request = DescribeTableRequest.builder().tableName(table).build(); final AWSExceptionManager exceptionManager = createExceptionManager(); @@ -394,7 +293,7 @@ private TableStatus tableStatus() throws DependencyException { TableStatus tableStatus = result.table().tableStatus(); log.debug("Lease table exists and is in status {}", tableStatus); - return tableStatus; + return result; } @Override @@ -405,6 +304,7 @@ public boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutS if (sleepTimeRemaining <= 0) { return false; } + log.info("Waiting for Lease table creation..."); long timeToSleepMillis = Math.min(TimeUnit.SECONDS.toMillis(secondsBetweenPolls), sleepTimeRemaining); @@ -419,6 +319,139 @@ public boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutS return true; } + private static boolean isTableInPayPerRequestMode(final DescribeTableResponse describeTableResponse) { + if (nonNull(describeTableResponse) + && nonNull(describeTableResponse.table().billingModeSummary()) + && describeTableResponse + .table() + .billingModeSummary() + .billingMode() + .equals(BillingMode.PAY_PER_REQUEST)) { + return true; + } + return false; + } + + @Override + public String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException { + final DescribeTableResponse describeTableResponse = describeLeaseTable(); + ProvisionedThroughput provisionedThroughput = null; + if (nonNull(describeTableResponse)) { + // If table already on PAY_PER_REQUEST then setting null provisionedThroughput creates the GSI in + // PAY_PER_REQUEST mode + if (!isTableInPayPerRequestMode(describeTableResponse)) { + /* + * Whatever is configured at the base table use that as WCU and RCU for the GSI. If this is new + * application created with provision mode, the set WCU and RCU will be same as that of what application + * provided, if this is old application where application provided WCU and RCU is no longer what is set + * on base table then we honor the capacity of base table. This is to avoid setting WCU and RCU very + * less on GSI and cause issues with base table. Customers are expected to tune in GSI WCU and RCU + * themselves after creation as they deem fit. + */ + provisionedThroughput = ProvisionedThroughput.builder() + .readCapacityUnits(describeTableResponse + .table() + .provisionedThroughput() + .readCapacityUnits()) + .writeCapacityUnits(describeTableResponse + .table() + .provisionedThroughput() + .writeCapacityUnits()) + .build(); + } + + final IndexStatus indexStatus = getIndexStatusFromDescribeTableResponse( + describeTableResponse.table(), LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME); + if (nonNull(indexStatus)) { + log.info( + "Lease table GSI {} already exists with status {}", + LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME, + indexStatus); + + // indexStatus is nonNull that means index already exists, return the status of index. + return indexStatus.toString(); + } + } + final UpdateTableRequest updateTableRequest = UpdateTableRequest.builder() + .tableName(table) + .attributeDefinitions(serializer.getWorkerIdToLeaseKeyIndexAttributeDefinitions()) + .globalSecondaryIndexUpdates(GlobalSecondaryIndexUpdate.builder() + .create(CreateGlobalSecondaryIndexAction.builder() + .indexName(LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME) + .keySchema(serializer.getWorkerIdToLeaseKeyIndexKeySchema()) + .projection(Projection.builder() + .projectionType(ProjectionType.KEYS_ONLY) + .build()) + .provisionedThroughput(provisionedThroughput) + .build()) + .build()) + .build(); + + try { + log.info("Creating Lease table GSI {}", LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME); + final UpdateTableResponse response = FutureUtils.resolveOrCancelFuture( + dynamoDBClient.updateTable(updateTableRequest), dynamoDbRequestTimeout); + return getIndexStatusFromDescribeTableResponse( + response.tableDescription(), LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME) + .toString(); + } catch (ExecutionException e) { + throw new DependencyException(nonNull(e.getCause()) ? e.getCause() : e); + } catch (InterruptedException | TimeoutException e) { + throw new DependencyException(e); + } + } + + private IndexStatus getIndexStatusFromDescribeTableResponse( + final TableDescription tableDescription, final String indexName) { + if (isNull(tableDescription)) { + return null; + } + return tableDescription.globalSecondaryIndexes().stream() + .filter(index -> index.indexName().equals(indexName)) + .findFirst() + .map(GlobalSecondaryIndexDescription::indexStatus) + .orElse(null); + } + + @Override + public boolean waitUntilLeaseOwnerToLeaseKeyIndexExists(final long secondsBetweenPolls, final long timeoutSeconds) { + final long startTime = System.currentTimeMillis(); + while (System.currentTimeMillis() - startTime + < Duration.ofSeconds(timeoutSeconds).toMillis()) { + try { + if (isLeaseOwnerToLeaseKeyIndexActive()) { + return true; + } + } catch (final Exception e) { + log.warn("Failed to fetch {} status", LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME, e); + } + try { + log.info("GSI status is not active, trying again in {}s", secondsBetweenPolls); + Thread.sleep(Duration.ofSeconds(secondsBetweenPolls).toMillis()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + log.info("GSI status was not active, after {}s", timeoutSeconds); + return false; + } + + @Override + public boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException { + final DescribeTableResponse describeTableResponse = describeLeaseTable(); + if (isNull(describeTableResponse)) { + return false; + } + final IndexStatus indexStatus = getIndexStatusFromDescribeTableResponse( + describeTableResponse.table(), LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME); + log.debug( + "Lease table GSI {} status {}", + LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME, + indexStatus == null ? "does not exist" : indexStatus); + + return indexStatus == IndexStatus.ACTIVE; + } + /** * Exposed for testing purposes. * @@ -447,6 +480,64 @@ public List listLeasesForStream(StreamIdentifier streamIdentifier) return list(null, streamIdentifier); } + /** + * {@inheritDoc} + * + * This method throws InvalidStateException in case of + * {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME} does not exists. + * If index creation is not done and want to listLeases for a worker, + * use {@link DynamoDBLeaseRefresher#listLeases()} and filter on that to list leases. + */ + @Override + public List listLeaseKeysForWorker(final String workerIdentifier) + throws DependencyException, InvalidStateException { + QueryRequest queryRequest = QueryRequest.builder() + .indexName(LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME) + .keyConditionExpression(LEASE_OWNER_INDEX_QUERY_CONDITIONAL_EXPRESSION) + .expressionAttributeValues(ImmutableMap.of( + DDB_LEASE_OWNER, + AttributeValue.builder().s(workerIdentifier).build())) + .tableName(table) + .build(); + + final AWSExceptionManager exceptionManager = createExceptionManager(); + exceptionManager.add(ResourceNotFoundException.class, t -> t); + + try { + try { + final List result = new ArrayList<>(); + + QueryResponse queryResponse = + FutureUtils.resolveOrCancelFuture(dynamoDBClient.query(queryRequest), dynamoDbRequestTimeout); + + while (queryResponse != null) { + for (Map item : queryResponse.items()) { + result.add(item.get(LEASE_KEY_KEY).s()); + } + final Map lastEvaluatedKey = queryResponse.lastEvaluatedKey(); + if (CollectionUtils.isNullOrEmpty(lastEvaluatedKey)) { + // Signify that we're done. + queryResponse = null; + } else { + // Make another request, picking up where we left off. + queryRequest = queryRequest.toBuilder() + .exclusiveStartKey(lastEvaluatedKey) + .build(); + queryResponse = FutureUtils.resolveOrCancelFuture( + dynamoDBClient.query(queryRequest), dynamoDbRequestTimeout); + } + } + return result; + } catch (final ExecutionException e) { + throw exceptionManager.apply(e.getCause()); + } + } catch (final ResourceNotFoundException e) { + throw new InvalidStateException(LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME + " does not exists.", e); + } catch (final Exception e) { + throw new DependencyException(e); + } + } + /** * {@inheritDoc} */ @@ -455,8 +546,87 @@ public List listLeases() throws DependencyException, InvalidStateExceptio return list(null, null); } + @Override + public Map.Entry, List> listLeasesParallely( + final ExecutorService parallelScanExecutorService, final int parallelScanTotalSegment) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + final List leaseItemFailedDeserialize = new ArrayList<>(); + final List response = new ArrayList<>(); + final List>>> futures = new ArrayList<>(); + for (int i = 0; i < parallelScanTotalSegment; ++i) { + final int segmentNumber = i; + futures.add(parallelScanExecutorService.submit(() -> scanSegment(segmentNumber, parallelScanTotalSegment))); + } + try { + for (final Future>> future : futures) { + for (final Map item : future.get()) { + try { + response.add(serializer.fromDynamoRecord(item)); + } catch (final Exception e) { + // If one or more leases failed to deserialize for some reason (e.g. corrupted lease etc + // do not fail all list call. Capture failed deserialize item and return to caller. + log.error("Failed to deserialize lease", e); + // If a item exists in DDB then "leaseKey" should be always present as its primaryKey + leaseItemFailedDeserialize.add(item.get(LEASE_KEY_KEY).s()); + } + } + } + } catch (final ExecutionException e) { + final Throwable throwable = e.getCause() != null ? e.getCause() : e; + if (throwable instanceof ResourceNotFoundException) { + throw new InvalidStateException("Cannot scan lease table " + table + " because it does not exist.", e); + } else if (throwable instanceof ProvisionedThroughputException) { + throw new ProvisionedThroughputException(e); + } else { + throw new DependencyException(e); + } + } catch (final InterruptedException e) { + throw new DependencyException(e); + } + return new AbstractMap.SimpleEntry<>(response, leaseItemFailedDeserialize); + } + + private List> scanSegment(final int segment, final int parallelScanTotalSegment) + throws DependencyException { + + final List> response = new ArrayList<>(); + + final AWSExceptionManager exceptionManager = createExceptionManager(); + exceptionManager.add(ResourceNotFoundException.class, t -> t); + exceptionManager.add(ProvisionedThroughputExceededException.class, t -> t); + + Map lastEvaluatedKey = null; + do { + try { + final ScanRequest scanRequest = ScanRequest.builder() + .tableName(table) + .segment(segment) + .totalSegments(parallelScanTotalSegment) + .exclusiveStartKey(lastEvaluatedKey) + .build(); + + final ScanResponse scanResult = + FutureUtils.resolveOrCancelFuture(dynamoDBClient.scan(scanRequest), dynamoDbRequestTimeout); + response.addAll(scanResult.items()); + if (scanResult.hasLastEvaluatedKey()) { + lastEvaluatedKey = scanResult.lastEvaluatedKey(); + } else { + // null signifies that the scan is complete for this segment. + lastEvaluatedKey = null; + } + } catch (final ExecutionException e) { + throw exceptionManager.apply(e.getCause()); + } catch (final InterruptedException | TimeoutException e) { + throw new DependencyException(e); + } + } while (lastEvaluatedKey != null); + + return response; + } + /** * {@inheritDoc} + * Current implementation has a fixed 10 parallism */ @Override public boolean isLeaseTableEmpty() @@ -492,7 +662,6 @@ List list(Integer limit, StreamIdentifier streamIdentifier) */ private List list(Integer limit, Integer maxPages, StreamIdentifier streamIdentifier) throws DependencyException, InvalidStateException, ProvisionedThroughputException { - log.debug("Listing leases from table {}", table); ScanRequest.Builder scanRequestBuilder = ScanRequest.builder().tableName(table); @@ -642,11 +811,30 @@ public boolean renewLease(@NonNull final Lease lease) throws DependencyException, InvalidStateException, ProvisionedThroughputException { log.debug("Renewing lease with key {}", lease.leaseKey()); - UpdateItemRequest request = UpdateItemRequest.builder() + final Map attributeUpdates = new HashMap<>(); + attributeUpdates.putAll(serializer.getDynamoLeaseCounterUpdate(lease)); + if (nonNull(lease.throughputKBps())) { + attributeUpdates.putAll(serializer.getDynamoLeaseThroughputKbpsUpdate(lease)); + } + final Map expected = serializer.getDynamoLeaseCounterExpectation(lease); + + // In steady-state execution, the lease is not expected to contain shutdown attributes. If a shutdown + // is requested, a conditional update failure is triggered. When this happens, we examine the returned + // lease to determine if the failure resulted from a shutdown request. If so, we update the shutdown + // attributes in the in-memory lease and retry the renewal without the expectedValue, + // allowing it to complete successfully + if (!lease.shutdownRequested()) { + expected.put( + CHECKPOINT_OWNER, + ExpectedAttributeValue.builder().exists(false).build()); + } + + final UpdateItemRequest request = UpdateItemRequest.builder() .tableName(table) .key(serializer.getDynamoHashKey(lease)) - .expected(serializer.getDynamoLeaseCounterExpectation(lease)) - .attributeUpdates(serializer.getDynamoLeaseCounterUpdate(lease)) + .expected(expected) + .attributeUpdates(attributeUpdates) + .returnValuesOnConditionCheckFailure(ReturnValuesOnConditionCheckFailure.ALL_OLD) .build(); final AWSExceptionManager exceptionManager = createExceptionManager(); @@ -662,15 +850,30 @@ public boolean renewLease(@NonNull final Lease lease) throw new DependencyException(e); } } catch (ConditionalCheckFailedException e) { + // run this code block only if the in-memory lease doesn't have the shutdown attributes + if (!lease.shutdownRequested()) { + final Lease ddbLease; + if (!e.hasItem()) { + // This is a workaround for unit testing and ddblocal since it doesn't return the item + // in the error response. Can remove it once the functionality is supported in ddblocal. + ddbLease = getLease(lease.leaseKey()); + } else { + ddbLease = serializer.fromDynamoRecord(e.item()); + } + if (ddbLease != null && ddbLease.shutdownRequested()) { + return handleGracefulShutdown(lease, ddbLease); + } + } log.debug( "Lease renewal failed for lease with key {} because the lease counter was not {}", lease.leaseKey(), lease.leaseCounter()); - // If we had a spurious retry during the Dynamo update, then this conditional PUT failure // might be incorrect. So, we get the item straight away and check if the lease owner + lease // counter are what we expected. - String expectedOwner = lease.leaseOwner(); + // We need to use actualOwner because leaseOwner might have been updated to the nextOwner + // in the previous renewal. + final String expectedOwner = lease.actualOwner(); Long expectedCounter = lease.leaseCounter() + 1; final Lease updatedLease = getLease(lease.leaseKey()); if (updatedLease == null @@ -683,7 +886,6 @@ public boolean renewLease(@NonNull final Lease lease) } catch (DynamoDbException | TimeoutException e) { throw new DependencyException(e); } - lease.leaseCounter(lease.leaseCounter() + 1); log.debug("Renewed lease with key {}", lease.leaseKey()); return true; @@ -737,7 +939,7 @@ public boolean takeLease(@NonNull final Lease lease, @NonNull final String owner lease.leaseCounter(lease.leaseCounter() + 1); lease.leaseOwner(owner); - + clearPendingShutdownAttributes(lease); if (oldOwner != null && !oldOwner.equals(owner)) { lease.ownerSwitchesSinceCheckpoint(lease.ownerSwitchesSinceCheckpoint() + 1); } @@ -747,6 +949,181 @@ public boolean takeLease(@NonNull final Lease lease, @NonNull final String owner return true; } + /** + * {@inheritDoc} + */ + @Override + public boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + final String oldOwner = lease.leaseOwner(); + + log.debug( + "Initiating graceful lease handoff for leaseKey {} from {} to {}", + lease.leaseKey(), + oldOwner, + newOwner); + + final AWSExceptionManager exceptionManager = createExceptionManager(); + exceptionManager.add(ConditionalCheckFailedException.class, t -> t); + + final Map updates = new HashMap<>(); + final Map expectedAttributeValueMap = new HashMap<>(); + // The update doesn't increment the leaseCounter because this can avoid interrupting the lease renewal of the + // current owner. This is safe because the graceful handoff is being initiated without competing for + // lease ownership or affecting the lease's existing state such as checkpoints. Additionally, once the lease + // enters the pendingCheckpoint state, the only remaining state change will be the reassignment, + // which causes the current owner to relinquish ownership so there will be no rewriting of pendingCheckpoint + // if there are concurrent LAM assignments somehow. + updates.put( + LEASE_OWNER_KEY, + AttributeValueUpdate.builder() + .value(DynamoUtils.createAttributeValue(newOwner)) + .action(AttributeAction.PUT) + .build()); + updates.put( + CHECKPOINT_OWNER, + AttributeValueUpdate.builder() + .value(DynamoUtils.createAttributeValue(lease.leaseOwner())) + .action(AttributeAction.PUT) + .build()); + + // The conditional checks ensure that the lease is not pending shutdown, + // so it should have the leaseOwner field, but not the checkpointOwner field. + expectedAttributeValueMap.put( + LEASE_OWNER_KEY, + ExpectedAttributeValue.builder() + .value(DynamoUtils.createAttributeValue(lease.leaseOwner())) + .build()); + expectedAttributeValueMap.put( + CHECKPOINT_OWNER, ExpectedAttributeValue.builder().exists(false).build()); + // see assignLease() + expectedAttributeValueMap.putAll(serializer.getDynamoExistentExpectation(lease.leaseKey())); + + // TODO: Add condition check on shardEnd. Don't graceful shut down a shardEnd shard + final UpdateItemRequest request = UpdateItemRequest.builder() + .tableName(table) + .key(serializer.getDynamoHashKey(lease)) + .expected(expectedAttributeValueMap) + .attributeUpdates(updates) + .returnValues(ReturnValue.ALL_NEW) + .build(); + + UpdateItemResponse response = null; + try { + try { + response = + FutureUtils.resolveOrCancelFuture(dynamoDBClient.updateItem(request), dynamoDbRequestTimeout); + } catch (ExecutionException e) { + throw exceptionManager.apply(e.getCause()); + } catch (InterruptedException e) { + throw new DependencyException(e); + } + } catch (final ConditionalCheckFailedException e) { + log.debug( + "Initiate graceful lease handoff failed for lease with key {} because the lease owner was not {}" + + " or the checkpoint owner was not empty or lease doesn't exist anymore", + lease.leaseKey(), + lease.leaseOwner()); + return false; + } catch (final DynamoDbException | TimeoutException e) { + throw convertAndRethrowExceptions("initiate_lease_handoff", lease.leaseKey(), e); + } + + final Lease updatedLease = serializer.fromDynamoRecord(response.attributes()); + lease.leaseCounter(updatedLease.leaseCounter()); + lease.leaseOwner(updatedLease.leaseOwner()); + lease.checkpointOwner(updatedLease.checkpointOwner()); + lease.ownerSwitchesSinceCheckpoint(updatedLease.ownerSwitchesSinceCheckpoint()); + + log.info("Initiated graceful lease handoff for lease {} from {} to {}.", lease.leaseKey(), oldOwner, newOwner); + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean assignLease(final Lease lease, final String newOwner) + throws DependencyException, InvalidStateException, ProvisionedThroughputException { + final String oldOwner = lease.leaseOwner(); + final String checkpointOwner = lease.checkpointOwner() == null ? "nobody" : lease.checkpointOwner(); + log.debug( + "Assigning lease with leaseKey {} from {} to {} with checkpoint owner {}", + lease.leaseKey(), + lease.leaseOwner() == null ? "nobody" : lease.leaseOwner(), + newOwner, + checkpointOwner); + + final AWSExceptionManager exceptionManager = createExceptionManager(); + exceptionManager.add(ConditionalCheckFailedException.class, t -> t); + + // Performs the PUT action on leaseOwner and ADD action on the leaseCounter + // Updating leaseCounter will cause the existing owner to lose the lease. + // This also clears checkpointOwner attribute to trigger an immediate assignment. + final Map updates = serializer.getDynamoAssignLeaseUpdate(lease, newOwner); + + // Assignment should only happen when leaseOwner match and lease still exists. Lease exists check is required + // because in case of no leaseOwner, the conditional check no leaseOwner exists is met + // in case when lease does not exist as well so lease exists check validates that the lease is not deleted + // during assignLease call. + final Map expectedAttributeValueMap = + serializer.getDynamoLeaseOwnerExpectation(lease); + + // Make sure that the lease is always present and not deleted between read and update of assignLease call + // and when the owner is null on lease as conditional check on owner wont come into play. + expectedAttributeValueMap.putAll(serializer.getDynamoExistentExpectation(lease.leaseKey())); + + final UpdateItemRequest request = UpdateItemRequest.builder() + .tableName(table) + .key(serializer.getDynamoHashKey(lease)) + .expected(expectedAttributeValueMap) + .attributeUpdates(updates) + .returnValues(ReturnValue.ALL_NEW) + .returnValuesOnConditionCheckFailure(ReturnValuesOnConditionCheckFailure.ALL_OLD) + .build(); + + UpdateItemResponse response = null; + try { + try { + response = + FutureUtils.resolveOrCancelFuture(dynamoDBClient.updateItem(request), dynamoDbRequestTimeout); + } catch (ExecutionException e) { + throw exceptionManager.apply(e.getCause()); + } catch (InterruptedException e) { + throw new DependencyException(e); + } + } catch (final ConditionalCheckFailedException e) { + String failedCheckpointOwner = "nobody"; + if (e.hasItem()) { + failedCheckpointOwner = serializer.fromDynamoRecord(e.item()).checkpointOwner(); + } + log.debug( + "Assign lease failed for lease with key {} because the lease owner was not {} or the checkpoint" + + " owner was not {} but was {}", + lease.leaseKey(), + lease.leaseOwner(), + checkpointOwner, + failedCheckpointOwner); + return false; + } catch (final DynamoDbException | TimeoutException e) { + throw convertAndRethrowExceptions("assign", lease.leaseKey(), e); + } + + final Lease updatedLease = serializer.fromDynamoRecord(response.attributes()); + lease.leaseCounter(updatedLease.leaseCounter()); + lease.leaseOwner(updatedLease.leaseOwner()); + lease.ownerSwitchesSinceCheckpoint(updatedLease.ownerSwitchesSinceCheckpoint()); + clearPendingShutdownAttributes(lease); + log.info( + "Assigned lease {} ownership from {} to {} with checkpoint owner {}", + lease.leaseKey(), + oldOwner, + newOwner, + checkpointOwner); + + return true; + } + /** * {@inheritDoc} */ @@ -758,18 +1135,34 @@ public boolean evictLease(@NonNull final Lease lease) final AWSExceptionManager exceptionManager = createExceptionManager(); exceptionManager.add(ConditionalCheckFailedException.class, t -> t); - Map updates = serializer.getDynamoLeaseCounterUpdate(lease); - updates.putAll(serializer.getDynamoEvictLeaseUpdate(lease)); - UpdateItemRequest request = UpdateItemRequest.builder() + // AttributeValueUpdate: + // - remove either the leaseOwner or the checkpointOwner + // - increment leaseCounter + final Map updates = serializer.getDynamoEvictLeaseUpdate(lease); + + // ExpectedAttributeValue: + // This is similar to the condition we use in assignLease where we do conditional check on the owner fields + // and ensure lease still exists. This should ensure we are less likely to run into conditional check failure + // as the leaseCounter is frequently updated in other parts of the process. + // - ensure owner fields match + // - ensure lease still exists to ensure we don't end up creating malformed lease + final Map expectedAttributeValueMap = + serializer.getDynamoLeaseOwnerExpectation(lease); + expectedAttributeValueMap.putAll(serializer.getDynamoExistentExpectation(lease.leaseKey())); + + final UpdateItemRequest request = UpdateItemRequest.builder() .tableName(table) .key(serializer.getDynamoHashKey(lease)) - .expected(serializer.getDynamoLeaseOwnerExpectation(lease)) + .expected(expectedAttributeValueMap) .attributeUpdates(updates) + .returnValues(ReturnValue.ALL_NEW) .build(); + UpdateItemResponse response = null; try { try { - FutureUtils.resolveOrCancelFuture(dynamoDBClient.updateItem(request), dynamoDbRequestTimeout); + response = + FutureUtils.resolveOrCancelFuture(dynamoDBClient.updateItem(request), dynamoDbRequestTimeout); } catch (ExecutionException e) { throw exceptionManager.apply(e.getCause()); } catch (InterruptedException e) { @@ -786,8 +1179,10 @@ public boolean evictLease(@NonNull final Lease lease) throw convertAndRethrowExceptions("evict", lease.leaseKey(), e); } - lease.leaseOwner(null); - lease.leaseCounter(lease.leaseCounter() + 1); + final Lease updatedLease = serializer.fromDynamoRecord(response.attributes()); + lease.leaseCounter(updatedLease.leaseCounter()); + lease.leaseOwner(updatedLease.leaseOwner()); + clearPendingShutdownAttributes(lease); log.info("Evicted lease with leaseKey {}", lease.leaseKey()); return true; @@ -966,15 +1361,21 @@ protected DependencyException convertAndRethrowExceptions(String operation, Stri } } - private CreateTableRequest.Builder createTableRequestBuilder() { + private CreateTableRequest.Builder createTableRequestBuilder(final DdbTableConfig tableConfig) { final CreateTableRequest.Builder builder = CreateTableRequest.builder() .tableName(table) .keySchema(serializer.getKeySchema()) .attributeDefinitions(serializer.getAttributeDefinitions()) .deletionProtectionEnabled(leaseTableDeletionProtectionEnabled) .tags(tags); - if (BillingMode.PAY_PER_REQUEST.equals(billingMode)) { - builder.billingMode(billingMode); + if (BillingMode.PAY_PER_REQUEST.equals(tableConfig.billingMode())) { + builder.billingMode(BillingMode.PAY_PER_REQUEST); + } else { + builder.billingMode(BillingMode.PROVISIONED); + builder.provisionedThroughput(ProvisionedThroughput.builder() + .readCapacityUnits(tableConfig.readCapacity()) + .writeCapacityUnits(tableConfig.writeCapacity()) + .build()); } return builder; } @@ -991,4 +1392,32 @@ void performPostTableCreationAction() { .tableName(table) .build()); } + + private boolean handleGracefulShutdown(Lease lease, Lease ddbLease) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + // Drop the lease if lease and updatedLease have different owners. This can happen if lease is taken + // by someone else. + if (!lease.actualOwner().equals(ddbLease.actualOwner())) { + log.warn("Lease and updatedLease have different owners. Lease {}, updatedLease {}", lease, ddbLease); + return false; + } + // This updates the checkpointOwner and leaseOwner of the authoritative lease so the + // thread handling the lease graceful shutdown can perform the shutdown logic by checking this signal. + lease.checkpointOwner(ddbLease.checkpointOwner()); + lease.leaseOwner(ddbLease.leaseOwner()); + log.debug( + "Retry renewing lease with key {} as shutdown requested for leaseOwner {} and " + "checkpointOwner {}", + lease.leaseKey(), + lease.leaseOwner(), + lease.checkpointOwner()); + // Retry lease renewal after updating the in-memory lease with shutdown attributes + return renewLease(lease); + } + + // used by takeLease, evictLease and assignLease. These methods result in change in lease ownership so these + // attribute should be also removed. + private static void clearPendingShutdownAttributes(Lease lease) { + lease.checkpointOwner(null); + lease.checkpointOwnerTimeoutTimestampMillis(null); + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewer.java index a53ec4abc..27e840181 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewer.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewer.java @@ -14,6 +14,8 @@ */ package software.amazon.kinesis.leases.dynamodb; +import java.math.BigDecimal; +import java.math.RoundingMode; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -26,8 +28,10 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import lombok.NonNull; import lombok.RequiredArgsConstructor; @@ -39,6 +43,7 @@ import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.MultiStreamLease; import software.amazon.kinesis.leases.exceptions.DependencyException; import software.amazon.kinesis.leases.exceptions.InvalidStateException; @@ -48,21 +53,32 @@ import software.amazon.kinesis.metrics.MetricsScope; import software.amazon.kinesis.metrics.MetricsUtil; +import static java.util.Objects.nonNull; +import static software.amazon.kinesis.leases.LeaseStatsRecorder.BYTES_PER_KB; + /** * An implementation of {@link LeaseRenewer} that uses DynamoDB via {@link LeaseRefresher}. */ @Slf4j @KinesisClientInternalApi public class DynamoDBLeaseRenewer implements LeaseRenewer { + + /** + * 6 digit after decimal gives the granularity of 0.001 byte per second. + */ + private static final int DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL = 6; + private static final int RENEWAL_RETRIES = 2; private static final String RENEW_ALL_LEASES_DIMENSION = "RenewAllLeases"; + private static final String LEASE_RENEWER_INITIALIZE = "LeaseRenewerInitialize"; private final LeaseRefresher leaseRefresher; private final String workerIdentifier; private final long leaseDurationNanos; private final ExecutorService executorService; private final MetricsFactory metricsFactory; - + private final LeaseStatsRecorder leaseStatsRecorder; + private final Consumer leaseGracefulShutdownCallback; private final ConcurrentNavigableMap ownedLeases = new ConcurrentSkipListMap<>(); /** @@ -82,12 +98,16 @@ public DynamoDBLeaseRenewer( final String workerIdentifier, final long leaseDurationMillis, final ExecutorService executorService, - final MetricsFactory metricsFactory) { + final MetricsFactory metricsFactory, + final LeaseStatsRecorder leaseStatsRecorder, + final Consumer leaseGracefulShutdownCallback) { this.leaseRefresher = leaseRefresher; this.workerIdentifier = workerIdentifier; this.leaseDurationNanos = TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis); this.executorService = executorService; this.metricsFactory = metricsFactory; + this.leaseStatsRecorder = leaseStatsRecorder; + this.leaseGracefulShutdownCallback = leaseGracefulShutdownCallback; } /** @@ -187,11 +207,21 @@ private boolean renewLease(Lease lease, boolean renewEvenIfExpired) // ShutdownException). boolean isLeaseExpired = lease.isExpired(leaseDurationNanos, System.nanoTime()); if (renewEvenIfExpired || !isLeaseExpired) { + final Double throughputPerKBps = this.leaseStatsRecorder.getThroughputKBps(leaseKey); + if (nonNull(throughputPerKBps)) { + lease.throughputKBps(BigDecimal.valueOf(throughputPerKBps) + .setScale(DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP) + .doubleValue()); + } renewedLease = leaseRefresher.renewLease(lease); } if (renewedLease) { lease.lastCounterIncrementNanos(System.nanoTime()); } + if (lease.shutdownRequested()) { + // the underlying function will dedup + leaseGracefulShutdownCallback.accept(lease.copy()); + } } if (renewedLease) { @@ -391,6 +421,12 @@ public void addLeasesToRenew(Collection newLeases) { * every time we acquire a lease, it gets a new concurrency token. */ authoritativeLease.concurrencyToken(UUID.randomUUID()); + if (nonNull(lease.throughputKBps())) { + leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder() + .leaseKey(lease.leaseKey()) + .bytes(Math.round(lease.throughputKBps() * BYTES_PER_KB)) // Convert KB to Bytes + .build()); + } ownedLeases.put(authoritativeLease.leaseKey(), authoritativeLease); } } @@ -409,6 +445,7 @@ public void clearCurrentlyHeldLeases() { */ @Override public void dropLease(Lease lease) { + leaseStatsRecorder.dropLeaseStats(lease.leaseKey()); ownedLeases.remove(lease.leaseKey()); } @@ -417,26 +454,48 @@ public void dropLease(Lease lease) { */ @Override public void initialize() throws DependencyException, InvalidStateException, ProvisionedThroughputException { - Collection leases = leaseRefresher.listLeases(); - List myLeases = new LinkedList<>(); - boolean renewEvenIfExpired = true; - - for (Lease lease : leases) { - if (workerIdentifier.equals(lease.leaseOwner())) { - log.info(" Worker {} found lease {}", workerIdentifier, lease); - // Okay to renew even if lease is expired, because we start with an empty list and we add the lease to - // our list only after a successful renew. So we don't need to worry about the edge case where we could - // continue renewing a lease after signaling a lease loss to the application. - - if (renewLease(lease, renewEvenIfExpired)) { - myLeases.add(lease); + final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, LEASE_RENEWER_INITIALIZE); + final ExecutorService singleThreadExecutorService = Executors.newSingleThreadExecutor(); + boolean success = false; + try { + final Map.Entry, List> response = + leaseRefresher.listLeasesParallely(singleThreadExecutorService, 1); + + if (!response.getValue().isEmpty()) { + log.warn("List of leaseKeys failed to deserialize : {} ", response.getValue()); + } + + final List myLeases = new LinkedList<>(); + boolean renewEvenIfExpired = true; + + for (Lease lease : response.getKey()) { + if (workerIdentifier.equals(lease.leaseOwner())) { + log.info(" Worker {} found lease {}", workerIdentifier, lease); + // Okay to renew even if lease is expired, because we start with an empty list and we add the lease + // to + // our list only after a successful renew. So we don't need to worry about the edge case where we + // could + // continue renewing a lease after signaling a lease loss to the application. + + if (renewLease(lease, renewEvenIfExpired)) { + myLeases.add(lease); + } + } else { + log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease); } - } else { - log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease); } - } - addLeasesToRenew(myLeases); + addLeasesToRenew(myLeases); + success = true; + } catch (final Exception e) { + // It's ok to swollow exception here fail to discover all leases here, as the assignment logic takes + // care of reassignment if some lease is expired. + log.warn("LeaseRefresher failed in initialization during renewing of pre assigned leases", e); + } finally { + singleThreadExecutorService.shutdown(); + MetricsUtil.addCount(scope, "Fault", success ? 0 : 1, MetricsLevel.DETAILED); + MetricsUtil.endScope(scope); + } } private void verifyNotNull(Object object, String message) { diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseSerializer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseSerializer.java index c10cf475a..16d719bf8 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseSerializer.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseSerializer.java @@ -44,11 +44,8 @@ */ @KinesisClientInternalApi public class DynamoDBLeaseSerializer implements LeaseSerializer { - private static final String LEASE_KEY_KEY = "leaseKey"; - private static final String LEASE_OWNER_KEY = "leaseOwner"; private static final String LEASE_COUNTER_KEY = "leaseCounter"; private static final String OWNER_SWITCHES_KEY = "ownerSwitchesSinceCheckpoint"; - private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint"; private static final String CHECKPOINT_SUBSEQUENCE_NUMBER_KEY = "checkpointSubSequenceNumber"; private static final String PENDING_CHECKPOINT_SEQUENCE_KEY = "pendingCheckpoint"; private static final String PENDING_CHECKPOINT_SUBSEQUENCE_KEY = "pendingCheckpointSubSequenceNumber"; @@ -57,6 +54,11 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer { private static final String CHILD_SHARD_IDS_KEY = "childShardIds"; private static final String STARTING_HASH_KEY = "startingHashKey"; private static final String ENDING_HASH_KEY = "endingHashKey"; + private static final String THROUGHOUT_PUT_KBPS = "throughputKBps"; + private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint"; + static final String CHECKPOINT_OWNER = "checkpointOwner"; + static final String LEASE_OWNER_KEY = "leaseOwner"; + static final String LEASE_KEY_KEY = "leaseKey"; @Override public Map toDynamoRecord(final Lease lease) { @@ -110,6 +112,13 @@ public Map toDynamoRecord(final Lease lease) { lease.hashKeyRangeForLease().serializedEndingHashKey())); } + if (lease.throughputKBps() != null) { + result.put(THROUGHOUT_PUT_KBPS, DynamoUtils.createAttributeValue(lease.throughputKBps())); + } + + if (lease.checkpointOwner() != null) { + result.put(CHECKPOINT_OWNER, DynamoUtils.createAttributeValue(lease.checkpointOwner())); + } return result; } @@ -146,6 +155,14 @@ public Lease fromDynamoRecord(Map dynamoRecord, Lease le leaseToUpdate.hashKeyRange(HashKeyRangeForLease.deserialize(startingHashKey, endingHashKey)); } + if (DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS) != null) { + leaseToUpdate.throughputKBps(DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS)); + } + + if (DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER) != null) { + leaseToUpdate.checkpointOwner(DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER)); + } + return leaseToUpdate; } @@ -181,18 +198,9 @@ public Map getDynamoLeaseCounterExpectation(fina @Override public Map getDynamoLeaseOwnerExpectation(final Lease lease) { - Map result = new HashMap<>(); - - ExpectedAttributeValue.Builder eavBuilder = ExpectedAttributeValue.builder(); - - if (lease.leaseOwner() == null) { - eavBuilder = eavBuilder.exists(false); - } else { - eavBuilder = eavBuilder.value(DynamoUtils.createAttributeValue(lease.leaseOwner())); - } - - result.put(LEASE_OWNER_KEY, eavBuilder.build()); - + final Map result = new HashMap<>(); + result.put(LEASE_OWNER_KEY, buildExpectedAttributeValueIfExistsOrValue(lease.leaseOwner())); + result.put(CHECKPOINT_OWNER, buildExpectedAttributeValueIfExistsOrValue(lease.checkpointOwner())); return result; } @@ -247,9 +255,17 @@ public Map getDynamoTakeLeaseUpdate(final Lease le .value(DynamoUtils.createAttributeValue(owner)) .action(AttributeAction.PUT) .build()); + // this method is currently used by assignLease and takeLease. In both case we want the checkpoint owner to be + // deleted as this is a fresh assignment + result.put( + CHECKPOINT_OWNER, + AttributeValueUpdate.builder().action(AttributeAction.DELETE).build()); String oldOwner = lease.leaseOwner(); - if (oldOwner != null && !oldOwner.equals(owner)) { + String checkpointOwner = lease.checkpointOwner(); + // if checkpoint owner is not null, this update is supposed to remove the checkpoint owner + // and transfer the lease ownership to the leaseOwner so incrementing the owner switch key + if (oldOwner != null && !oldOwner.equals(owner) || (checkpointOwner != null && checkpointOwner.equals(owner))) { result.put( OWNER_SWITCHES_KEY, AttributeValueUpdate.builder() @@ -261,18 +277,38 @@ public Map getDynamoTakeLeaseUpdate(final Lease le return result; } + /** + * AssignLease performs the PUT action on the LeaseOwner and ADD (1) action on the leaseCounter. + * @param lease lease that needs to be assigned + * @param newOwner newLeaseOwner + * @return Map of AttributeName to update operation + */ @Override - public Map getDynamoEvictLeaseUpdate(final Lease lease) { - Map result = new HashMap<>(); - AttributeValue value = null; + public Map getDynamoAssignLeaseUpdate(final Lease lease, final String newOwner) { + Map result = getDynamoTakeLeaseUpdate(lease, newOwner); - result.put( - LEASE_OWNER_KEY, - AttributeValueUpdate.builder() - .value(value) - .action(AttributeAction.DELETE) - .build()); + result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd()); + return result; + } + @Override + public Map getDynamoEvictLeaseUpdate(final Lease lease) { + final Map result = new HashMap<>(); + // if checkpointOwner is not null, it means lease handoff is initiated. In this case we just remove the + // checkpoint owner so the next owner (leaseOwner) can pick up the lease without waiting for assignment. + // Otherwise, remove the leaseOwner + if (lease.checkpointOwner() == null) { + result.put( + LEASE_OWNER_KEY, + AttributeValueUpdate.builder() + .action(AttributeAction.DELETE) + .build()); + } + // We always want to remove checkpointOwner, it's ok even if it's null + result.put( + CHECKPOINT_OWNER, + AttributeValueUpdate.builder().action(AttributeAction.DELETE).build()); + result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd()); return result; } @@ -394,4 +430,58 @@ public Collection getAttributeDefinitions() { return definitions; } + + @Override + public Collection getWorkerIdToLeaseKeyIndexKeySchema() { + final List keySchema = new ArrayList<>(); + keySchema.add(KeySchemaElement.builder() + .attributeName(LEASE_OWNER_KEY) + .keyType(KeyType.HASH) + .build()); + keySchema.add(KeySchemaElement.builder() + .attributeName(LEASE_KEY_KEY) + .keyType(KeyType.RANGE) + .build()); + return keySchema; + } + + @Override + public Collection getWorkerIdToLeaseKeyIndexAttributeDefinitions() { + final List definitions = new ArrayList<>(); + definitions.add(AttributeDefinition.builder() + .attributeName(LEASE_OWNER_KEY) + .attributeType(ScalarAttributeType.S) + .build()); + definitions.add(AttributeDefinition.builder() + .attributeName(LEASE_KEY_KEY) + .attributeType(ScalarAttributeType.S) + .build()); + return definitions; + } + + @Override + public Map getDynamoLeaseThroughputKbpsUpdate(Lease lease) { + final Map result = new HashMap<>(); + final AttributeValueUpdate avu = AttributeValueUpdate.builder() + .value(DynamoUtils.createAttributeValue(lease.throughputKBps())) + .action(AttributeAction.PUT) + .build(); + result.put(THROUGHOUT_PUT_KBPS, avu); + return result; + } + + private static ExpectedAttributeValue buildExpectedAttributeValueIfExistsOrValue(String value) { + return value == null + ? ExpectedAttributeValue.builder().exists(false).build() + : ExpectedAttributeValue.builder() + .value(DynamoUtils.createAttributeValue(value)) + .build(); + } + + private static AttributeValueUpdate getAttributeValueUpdateForAdd() { + return AttributeValueUpdate.builder() + .value(DynamoUtils.createAttributeValue(1L)) + .action(AttributeAction.ADD) + .build(); + } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTaker.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTaker.java index 7e4942049..2550e5452 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTaker.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTaker.java @@ -106,15 +106,6 @@ public DynamoDBLeaseTaker withMaxLeasesForWorker(int maxLeasesForWorker) { return this; } - /** - * @deprecated Misspelled method, use {@link DynamoDBLeaseTaker#withVeryOldLeaseDurationNanosMultiplier(int)} - */ - @Deprecated - public DynamoDBLeaseTaker withVeryOldLeaseDurationNanosMultipler(long veryOldLeaseDurationNanosMultipler) { - this.veryOldLeaseDurationNanosMultiplier = (int) veryOldLeaseDurationNanosMultipler; - return this; - } - /** * Overrides the default very old lease duration nanos multiplier to increase the threshold for taking very old leases. * Setting this to a higher value than 3 will increase the threshold for very old lease taking. diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ConsumerStates.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ConsumerStates.java index 1ef197bd4..eb1a8f487 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ConsumerStates.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ConsumerStates.java @@ -266,7 +266,8 @@ public ConsumerTask createTask( argument.idleTimeInMilliseconds(), argument.aggregatorUtil(), argument.metricsFactory(), - argument.schemaRegistryDecoder()); + argument.schemaRegistryDecoder(), + argument.leaseCoordinator().leaseStatsRecorder()); } @Override @@ -336,7 +337,8 @@ public ConsumerTask createTask( argument.shardRecordProcessor(), argument.recordProcessorCheckpointer(), consumer.shutdownNotification(), - argument.shardInfo()); + argument.shardInfo(), + consumer.shardConsumerArgument().leaseCoordinator()); } @Override diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandler.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandler.java new file mode 100644 index 000000000..e1dfe1e18 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandler.java @@ -0,0 +1,213 @@ +package software.amazon.kinesis.lifecycle; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import lombok.Data; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseCoordinator; +import software.amazon.kinesis.leases.ShardInfo; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; + +/** + * This class handles the graceful shutdown of shard consumers. When a lease is requested for shutdown, it will be + * enqueued from the lease renewal thread which will call the shard consumer of the lease to enqueue a shutdown request. + * The class monitors those leases and check if the shutdown is properly completed. + * If the shard consumer doesn't shut down within the given timeout, it will trigger a lease transfer. + */ +@Slf4j +@RequiredArgsConstructor +@KinesisClientInternalApi +public class LeaseGracefulShutdownHandler { + + // Arbitrary number to run a similar frequency as the scheduler based on shardConsumerDispatchPollIntervalMillis + // which is how fast scheduler triggers state change. It's ok to add few extra second delay to call shutdown since + // the leases should still be processing by the current owner so there should not be processing delay due to this. + private static final long SHUTDOWN_CHECK_INTERVAL_MILLIS = 2000; + + private final long shutdownTimeoutMillis; + private final ConcurrentMap shardInfoShardConsumerMap; + private final LeaseCoordinator leaseCoordinator; + private final Supplier currentTimeSupplier; + private final ConcurrentMap shardInfoLeasePendingShutdownMap = + new ConcurrentHashMap<>(); + private final ScheduledExecutorService executorService; + + private volatile boolean isRunning = false; + + /** + * Factory method to create a new instance of LeaseGracefulShutdownHandler. + * + * @param shutdownTimeoutMillis Timeout for graceful shutdown of shard consumers. + * @param shardInfoShardConsumerMap Map of shard info to shard consumer instances. + * @param leaseCoordinator Lease coordinator instance to access lease information. + * @return A new instance of LeaseGracefulShutdownHandler. + */ + public static LeaseGracefulShutdownHandler create( + long shutdownTimeoutMillis, + ConcurrentMap shardInfoShardConsumerMap, + LeaseCoordinator leaseCoordinator) { + return new LeaseGracefulShutdownHandler( + shutdownTimeoutMillis, + shardInfoShardConsumerMap, + leaseCoordinator, + System::currentTimeMillis, + Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder() + .setNameFormat("LeaseGracefulShutdown-%04d") + .setDaemon(true) + .build())); + } + + /** + * Starts the shard consumer shutdown handler thread. + */ + public void start() { + if (!isRunning) { + log.info("Starting graceful lease handoff thread."); + executorService.scheduleAtFixedRate( + this::monitorGracefulShutdownLeases, 0, SHUTDOWN_CHECK_INTERVAL_MILLIS, TimeUnit.MILLISECONDS); + isRunning = true; + } else { + log.info("Graceful lease handoff thread already running, no need to start."); + } + } + + /** + * Stops the shard consumer shutdown handler thread. + */ + public void stop() { + if (isRunning) { + log.info("Stopping graceful lease handoff thread."); + executorService.shutdown(); + isRunning = false; + } else { + log.info("Graceful lease handoff thread already stopped."); + } + } + + /** + * Enqueue a shutdown request for the given lease if the lease has requested shutdown and the shard consumer + * is not already shutdown. + * + * @param lease The lease to enqueue a shutdown request for. + */ + public void enqueueShutdown(Lease lease) { + if (lease == null || !lease.shutdownRequested() || !isRunning) { + return; + } + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + final ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo); + if (consumer == null || consumer.isShutdown()) { + shardInfoLeasePendingShutdownMap.remove(shardInfo); + } else { + // there could be change shard get enqueued after getting removed. This should be okay because + // this enqueue will be no-op and will be removed again because the shardConsumer associated with the + // shardInfo is shutdown by then. + shardInfoLeasePendingShutdownMap.computeIfAbsent(shardInfo, key -> { + log.info("Calling graceful shutdown for lease {}", lease.leaseKey()); + LeasePendingShutdown leasePendingShutdown = new LeasePendingShutdown(lease, consumer); + initiateShutdown(leasePendingShutdown); + return leasePendingShutdown; + }); + } + } + + /** + * Wait for shutdown to complete or transfer ownership of lease to the next owner if timeout is met. + */ + private void monitorGracefulShutdownLeases() { + String leaseKey = null; + try { + for (ConcurrentMap.Entry entry : + shardInfoLeasePendingShutdownMap.entrySet()) { + final LeasePendingShutdown leasePendingShutdown = entry.getValue(); + final ShardInfo shardInfo = entry.getKey(); + leaseKey = leasePendingShutdown.lease.leaseKey(); + + if (leasePendingShutdown.shardConsumer.isShutdown() + || shardInfoShardConsumerMap.get(shardInfo) == null + || leaseCoordinator.getCurrentlyHeldLease(leaseKey) == null) { + logTimeoutMessage(leasePendingShutdown); + shardInfoLeasePendingShutdownMap.remove(shardInfo); + } else if (getCurrentTimeMillis() >= leasePendingShutdown.timeoutTimestampMillis + && !leasePendingShutdown.leaseTransferCalled) { + try { + log.info( + "Timeout {} millisecond reached waiting for lease {} to graceful handoff." + + " Attempting to transfer the lease to {}", + shutdownTimeoutMillis, + leaseKey, + leasePendingShutdown.lease.leaseOwner()); + transferLeaseIfOwner(leasePendingShutdown); + } catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) { + log.warn("Failed to transfer lease for key {}. Will retry", leaseKey, e); + } + } + } + } catch (Exception e) { + log.error("Error in graceful shutdown for lease {}", leaseKey, e); + } + } + + private void initiateShutdown(LeasePendingShutdown tracker) { + tracker.shardConsumer.gracefulShutdown(null); + tracker.shutdownRequested = true; + tracker.timeoutTimestampMillis = getCurrentTimeMillis() + shutdownTimeoutMillis; + } + + private void logTimeoutMessage(LeasePendingShutdown leasePendingShutdown) { + if (leasePendingShutdown.leaseTransferCalled) { + final long timeElapsedSinceShutdownInitiated = + getCurrentTimeMillis() - leasePendingShutdown.timeoutTimestampMillis + shutdownTimeoutMillis; + log.info( + "Lease {} took {} milliseconds to complete the shutdown. " + + "Consider tuning the GracefulLeaseHandoffTimeoutMillis to prevent timeouts, " + + "if necessary.", + leasePendingShutdown.lease.leaseKey(), + timeElapsedSinceShutdownInitiated); + } + } + + private void transferLeaseIfOwner(LeasePendingShutdown leasePendingShutdown) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final Lease lease = leasePendingShutdown.lease; + if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) { + // assignLease will increment the leaseCounter which will cause the heartbeat to stop on the current owner + // for the lease + leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner()); + } else { + // the worker ID check is just for sanity. We don't expect it to be different from the current worker. + log.error( + "Lease {} checkpoint owner mismatch found {} but it should be {}", + lease.leaseKey(), + lease.checkpointOwner(), + leaseCoordinator.workerIdentifier()); + } + // mark it true because we don't want to enter the method again because update is not possible anymore. + leasePendingShutdown.leaseTransferCalled = true; + } + + private long getCurrentTimeMillis() { + return currentTimeSupplier.get(); + } + + @Data + private static class LeasePendingShutdown { + final Lease lease; + final ShardConsumer shardConsumer; + long timeoutTimestampMillis; + boolean shutdownRequested = false; + boolean leaseTransferCalled = false; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ProcessTask.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ProcessTask.java index 39a6bff6d..34543cc4c 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ProcessTask.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ProcessTask.java @@ -24,6 +24,7 @@ import software.amazon.kinesis.annotations.KinesisClientInternalApi; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; import software.amazon.kinesis.common.StreamIdentifier; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput; @@ -65,6 +66,7 @@ public class ProcessTask implements ConsumerTask { private final AggregatorUtil aggregatorUtil; private final String shardInfoId; private final SchemaRegistryDecoder schemaRegistryDecoder; + private final LeaseStatsRecorder leaseStatsRecorder; public ProcessTask( @NonNull ShardInfo shardInfo, @@ -79,7 +81,8 @@ public ProcessTask( long idleTimeInMilliseconds, @NonNull AggregatorUtil aggregatorUtil, @NonNull MetricsFactory metricsFactory, - SchemaRegistryDecoder schemaRegistryDecoder) { + SchemaRegistryDecoder schemaRegistryDecoder, + @NonNull LeaseStatsRecorder leaseStatsRecorder) { this.shardInfo = shardInfo; this.shardInfoId = ShardInfo.getLeaseKey(shardInfo); this.shardRecordProcessor = shardRecordProcessor; @@ -91,6 +94,7 @@ public ProcessTask( this.idleTimeInMilliseconds = idleTimeInMilliseconds; this.metricsFactory = metricsFactory; this.schemaRegistryDecoder = schemaRegistryDecoder; + this.leaseStatsRecorder = leaseStatsRecorder; if (!skipShardSyncAtWorkerInitializationIfLeasesExist) { this.shard = shardDetector.shard(shardInfo.shardId()); @@ -173,6 +177,7 @@ public TaskResult call() { recordProcessorCheckpointer.largestPermittedCheckpointValue())); if (shouldCallProcessRecords(records)) { + publishLeaseStats(records); callProcessRecords(processRecordsInput, records); } success = true; @@ -197,6 +202,15 @@ public TaskResult call() { } } + private void publishLeaseStats(final List records) { + leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder() + .bytes(records.stream() + .mapToInt(record -> record.data().limit()) + .sum()) + .leaseKey(ShardInfo.getLeaseKey(shardInfo)) + .build()); + } + private List deaggregateAnyKplRecords(List records) { if (shard == null) { return aggregatorUtil.deaggregate(records); diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShardConsumer.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShardConsumer.java index a4c0a1e0e..2e519ee19 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShardConsumer.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShardConsumer.java @@ -21,7 +21,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; -import java.util.function.Function; import com.google.common.annotations.VisibleForTesting; import lombok.AccessLevel; @@ -35,8 +34,6 @@ import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput; import software.amazon.kinesis.lifecycle.events.TaskExecutionListenerInput; -import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator; -import software.amazon.kinesis.metrics.MetricsFactory; import software.amazon.kinesis.retrieval.RecordsPublisher; /** @@ -59,12 +56,6 @@ public class ShardConsumer { @NonNull private final Optional logWarningForTaskAfterMillis; - /** - * @deprecated unused; to be removed in a "major" version bump - */ - @Deprecated - private final Function taskMetricsDecorator; - private final int bufferSize; private final TaskExecutionListener taskExecutionListener; private final String streamIdentifier; @@ -95,27 +86,6 @@ public class ShardConsumer { private ProcessRecordsInput shardEndProcessRecordsInput; - @Deprecated - public ShardConsumer( - RecordsPublisher recordsPublisher, - ExecutorService executorService, - ShardInfo shardInfo, - Optional logWarningForTaskAfterMillis, - ShardConsumerArgument shardConsumerArgument, - TaskExecutionListener taskExecutionListener) { - this( - recordsPublisher, - executorService, - shardInfo, - logWarningForTaskAfterMillis, - shardConsumerArgument, - ConsumerStates.INITIAL_STATE, - ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()), - 8, - taskExecutionListener, - LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE); - } - public ShardConsumer( RecordsPublisher recordsPublisher, ExecutorService executorService, @@ -131,36 +101,11 @@ public ShardConsumer( logWarningForTaskAfterMillis, shardConsumerArgument, ConsumerStates.INITIAL_STATE, - ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()), 8, taskExecutionListener, readTimeoutsToIgnoreBeforeWarning); } - @Deprecated - public ShardConsumer( - RecordsPublisher recordsPublisher, - ExecutorService executorService, - ShardInfo shardInfo, - Optional logWarningForTaskAfterMillis, - ShardConsumerArgument shardConsumerArgument, - ConsumerState initialState, - Function taskMetricsDecorator, - int bufferSize, - TaskExecutionListener taskExecutionListener) { - this( - recordsPublisher, - executorService, - shardInfo, - logWarningForTaskAfterMillis, - shardConsumerArgument, - initialState, - taskMetricsDecorator, - bufferSize, - taskExecutionListener, - LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE); - } - // // TODO: Make bufferSize configurable // @@ -171,7 +116,6 @@ public ShardConsumer( Optional logWarningForTaskAfterMillis, ShardConsumerArgument shardConsumerArgument, ConsumerState initialState, - Function taskMetricsDecorator, int bufferSize, TaskExecutionListener taskExecutionListener, int readTimeoutsToIgnoreBeforeWarning) { @@ -183,7 +127,6 @@ public ShardConsumer( this.logWarningForTaskAfterMillis = logWarningForTaskAfterMillis; this.taskExecutionListener = taskExecutionListener; this.currentState = initialState; - this.taskMetricsDecorator = taskMetricsDecorator; subscriber = new ShardConsumerSubscriber( recordsPublisher, executorService, bufferSize, this, readTimeoutsToIgnoreBeforeWarning); this.bufferSize = bufferSize; @@ -484,17 +427,18 @@ private void logTaskException(TaskResult taskResult) { } /** - * Requests the shutdown of the this ShardConsumer. This should give the record processor a chance to checkpoint + * Requests the shutdown of the ShardConsumer. This should give the record processor a chance to checkpoint * before being shutdown. * - * @param shutdownNotification - * used to signal that the record processor has been given the chance to shutdown. + * @param shutdownNotification used to signal that the record processor has been given the chance to shut down. */ public void gracefulShutdown(ShutdownNotification shutdownNotification) { if (subscriber != null) { subscriber.cancel(); } - this.shutdownNotification = shutdownNotification; + if (shutdownNotification != null) { + this.shutdownNotification = shutdownNotification; + } markForShutdown(ShutdownReason.REQUESTED); } @@ -542,21 +486,4 @@ public boolean isShutdownRequested() { return shutdownReason != null; } } - - /** - * Default task wrapping function for metrics - * - * @param metricsFactory - * the factory used for reporting metrics - * @return a function that will wrap the task with a metrics reporter - */ - private static Function metricsWrappingFunction(MetricsFactory metricsFactory) { - return (task) -> { - if (task == null) { - return null; - } else { - return new MetricsCollectingTaskDecorator(task, metricsFactory); - } - }; - } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTask.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTask.java index 5356cd23d..b479c8bad 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTask.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTask.java @@ -18,7 +18,12 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseCoordinator; import software.amazon.kinesis.leases.ShardInfo; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; import software.amazon.kinesis.lifecycle.events.ShutdownRequestedInput; import software.amazon.kinesis.processor.RecordProcessorCheckpointer; import software.amazon.kinesis.processor.ShardRecordProcessor; @@ -33,23 +38,41 @@ public class ShutdownNotificationTask implements ConsumerTask { private final ShardRecordProcessor shardRecordProcessor; private final RecordProcessorCheckpointer recordProcessorCheckpointer; private final ShutdownNotification shutdownNotification; - // TODO: remove if not used private final ShardInfo shardInfo; + private final LeaseCoordinator leaseCoordinator; @Override public TaskResult call() { + final String leaseKey = ShardInfo.getLeaseKey(shardInfo); + final Lease currentShardLease = leaseCoordinator.getCurrentlyHeldLease(leaseKey); try { try { shardRecordProcessor.shutdownRequested(ShutdownRequestedInput.builder() .checkpointer(recordProcessorCheckpointer) .build()); + attemptLeaseTransfer(currentShardLease); } catch (Exception ex) { return new TaskResult(ex); } - return new TaskResult(null); } finally { - shutdownNotification.shutdownNotificationComplete(); + if (shutdownNotification != null) { + shutdownNotification.shutdownNotificationComplete(); + } else { + // shutdownNotification is null if this is a shard level graceful shutdown instead of a worker level + // one. We need to drop lease like what's done in the shutdownNotificationComplete so we can + // transition to next state. + leaseCoordinator.dropLease(currentShardLease); + } + } + } + + private void attemptLeaseTransfer(Lease lease) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + if (lease != null && lease.shutdownRequested()) { + if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) { + leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner()); + } } } diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownTask.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownTask.java index 4059719f4..6e84bde6e 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownTask.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/lifecycle/ShutdownTask.java @@ -164,7 +164,6 @@ public TaskResult call() { } else { throwOnApplicationException(leaseKey, leaseLostAction, scope, startTime); } - log.debug("Shutting down retrieval strategy for shard {}.", leaseKey); recordsPublisher.shutdown(); diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/RetrievalConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/RetrievalConfig.java index fdd6c4457..0c635fb45 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/RetrievalConfig.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/RetrievalConfig.java @@ -49,7 +49,7 @@ public class RetrievalConfig { */ public static final String KINESIS_CLIENT_LIB_USER_AGENT = "amazon-kinesis-client-library-java"; - public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "2.6.1-SNAPSHOT"; + public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "3.0.0"; /** * Client used to make calls to Kinesis for records retrieval diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PollingConfig.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PollingConfig.java index 1fe924d73..efd8b9dd6 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PollingConfig.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PollingConfig.java @@ -26,6 +26,7 @@ import lombok.Setter; import lombok.ToString; import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest; import software.amazon.kinesis.retrieval.DataFetcherProviderConfig; @@ -38,12 +39,15 @@ @Setter @ToString @EqualsAndHashCode +@Slf4j public class PollingConfig implements RetrievalSpecificConfig { public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(30); public static final int DEFAULT_MAX_RECORDS = 10000; + public static final long MIN_IDLE_MILLIS_BETWEEN_READS = 200L; + /** * Configurable functional interface to override the existing DataFetcher. */ @@ -138,9 +142,18 @@ public void setIdleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) { /** * Set the value for how long the ShardConsumer should sleep in between calls to * {@link KinesisAsyncClient#getRecords(GetRecordsRequest)}. If this is not specified here the value provided in - * {@link RecordsFetcherFactory} will be used. + * {@link RecordsFetcherFactory} will be used. Cannot set value below MIN_IDLE_MILLIS_BETWEEN_READS. */ public PollingConfig idleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) { + if (idleTimeBetweenReadsInMillis < MIN_IDLE_MILLIS_BETWEEN_READS) { + log.warn( + "idleTimeBetweenReadsInMillis must be greater than or equal to {} but current value is {}." + + " Defaulting to minimum {}.", + MIN_IDLE_MILLIS_BETWEEN_READS, + idleTimeBetweenReadsInMillis, + MIN_IDLE_MILLIS_BETWEEN_READS); + idleTimeBetweenReadsInMillis = MIN_IDLE_MILLIS_BETWEEN_READS; + } usePollingConfigIdleTimeValue = true; this.idleTimeBetweenReadsInMillis = idleTimeBetweenReadsInMillis; return this; diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java index 02e2f7f59..32d767708 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisher.java @@ -61,6 +61,7 @@ import software.amazon.kinesis.retrieval.RecordsPublisher; import software.amazon.kinesis.retrieval.RecordsRetrieved; import software.amazon.kinesis.retrieval.RetryableRetrievalException; +import software.amazon.kinesis.retrieval.ThrottlingReporter; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import static software.amazon.kinesis.common.DiagnosticUtils.takeDelayedDeliveryActionIfRequired; @@ -109,6 +110,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher { private boolean wasReset = false; private Instant lastEventDeliveryTime = Instant.EPOCH; private final RequestDetails lastSuccessfulRequestDetails = new RequestDetails(); + private final ThrottlingReporter throttlingReporter; @Data @Accessors(fluent = true) @@ -233,6 +235,7 @@ public PrefetchRecordsPublisher( @NonNull final MetricsFactory metricsFactory, @NonNull final String operation, @NonNull final String shardId, + final ThrottlingReporter throttlingReporter, final long awaitTerminationTimeoutMillis) { this.getRecordsRetrievalStrategy = getRecordsRetrievalStrategy; this.maxRecordsPerCall = maxRecordsPerCall; @@ -248,6 +251,7 @@ public PrefetchRecordsPublisher( this.idleMillisBetweenCalls = idleMillisBetweenCalls; this.defaultGetRecordsCacheDaemon = new DefaultGetRecordsCacheDaemon(); Validate.notEmpty(operation, "Operation cannot be empty"); + this.throttlingReporter = throttlingReporter; this.operation = operation; this.streamId = this.getRecordsRetrievalStrategy.dataFetcher().getStreamIdentifier(); this.streamAndShardId = this.streamId.serialize() + ":" + shardId; @@ -279,7 +283,8 @@ public PrefetchRecordsPublisher( final long idleMillisBetweenCalls, final MetricsFactory metricsFactory, final String operation, - final String shardId) { + final String shardId, + final ThrottlingReporter throttlingReporter) { this( maxPendingProcessRecordsInput, maxByteSize, @@ -291,6 +296,7 @@ public PrefetchRecordsPublisher( metricsFactory, operation, shardId, + throttlingReporter, DEFAULT_AWAIT_TERMINATION_TIMEOUT_MILLIS); } @@ -555,6 +561,7 @@ private void makeRetrievalAttempt() { recordsRetrieved.lastBatchSequenceNumber); addArrivedRecordsInput(recordsRetrieved); drainQueueForRequests(); + throttlingReporter.success(); } catch (PositionResetException pse) { throw pse; } catch (RetryableRetrievalException rre) { @@ -584,10 +591,11 @@ private void makeRetrievalAttempt() { publisherSession.dataFetcher().restartIterator(); } catch (ProvisionedThroughputExceededException e) { - // Update the lastSuccessfulCall if we get a throttling exception so that we back off idleMillis - // for the next call - lastSuccessfulCall = Instant.now(); - log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e); + log.error( + "{} : ProvisionedThroughputExceededException thrown while fetching records from Kinesis", + streamAndShardId, + e); + throttlingReporter.throttled(); } catch (SdkException e) { log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e); } finally { diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/SimpleRecordsFetcherFactory.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/SimpleRecordsFetcherFactory.java index 2f1dea62d..b53f15761 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/SimpleRecordsFetcherFactory.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/retrieval/polling/SimpleRecordsFetcherFactory.java @@ -24,6 +24,7 @@ import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy; import software.amazon.kinesis.retrieval.RecordsFetcherFactory; import software.amazon.kinesis.retrieval.RecordsPublisher; +import software.amazon.kinesis.retrieval.ThrottlingReporter; @Slf4j @KinesisClientInternalApi @@ -32,6 +33,7 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory { private int maxByteSize = 8 * 1024 * 1024; private int maxRecordsCount = 30000; private long idleMillisBetweenCalls = 1500L; + private int maxConsecutiveThrottles = 5; private DataFetchingStrategy dataFetchingStrategy = DataFetchingStrategy.DEFAULT; @Override @@ -56,7 +58,8 @@ public RecordsPublisher createRecordsFetcher( idleMillisBetweenCalls, metricsFactory, "ProcessTask", - shardId); + shardId, + new ThrottlingReporter(maxConsecutiveThrottles, shardId)); } @Override diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/schemaregistry/SchemaRegistryDecoder.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/schemaregistry/SchemaRegistryDecoder.java index 56742a5e7..cfcfba0ee 100644 --- a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/schemaregistry/SchemaRegistryDecoder.java +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/schemaregistry/SchemaRegistryDecoder.java @@ -7,7 +7,6 @@ import com.amazonaws.services.schemaregistry.common.Schema; import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryDeserializer; import lombok.extern.slf4j.Slf4j; -import software.amazon.kinesis.common.KinesisClientLibraryPackage; import software.amazon.kinesis.retrieval.KinesisClientRecord; /** @@ -15,7 +14,7 @@ */ @Slf4j public class SchemaRegistryDecoder { - private static final String USER_AGENT_APP_NAME = "kcl" + "-" + KinesisClientLibraryPackage.VERSION; + private static final String USER_AGENT_APP_NAME = "kcl" + "-" + "3.0.0"; private final GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer; public SchemaRegistryDecoder(GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer) { diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Cgroup.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Cgroup.java new file mode 100644 index 000000000..3564467cc --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Cgroup.java @@ -0,0 +1,61 @@ +package software.amazon.kinesis.utils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class Cgroup { + + public static String readSingleLineFile(String path) { + BufferedReader bufferedReader = null; + try { + final File file = new File(path); + if (file.exists()) { + bufferedReader = new BufferedReader(new FileReader(file)); + return bufferedReader.readLine(); + } else { + throw new IllegalArgumentException(String.format("Failed to read file. %s does not exist", path)); + } + } catch (final Throwable t) { + if (t instanceof IllegalArgumentException) { + throw (IllegalArgumentException) t; + } + throw new IllegalArgumentException("Failed to read file.", t); + } finally { + try { + if (bufferedReader != null) { + bufferedReader.close(); + } + } catch (Throwable x) { + log.warn("Failed to close bufferedReader ", x); + } + } + } + + /** + * Calculates the number of available cpus from the cpuset + * See https://docs.kernel.org/admin-guide/cgroup-v2.html#cpuset for more information + * "0-7" represents 8 cores + * "0-4,6,8-10" represents 9 cores (cores 0,1,2,3,4 and core 6 and core 8,9,10) + * @param cpuSet a single line from the cgroup cpuset file + * @return the number of available cpus + */ + public static int getAvailableCpusFromEffectiveCpuSet(final String cpuSet) { + final String[] cpuSetArr = cpuSet.split(","); + + int sumCpus = 0; + for (String cpuSetGroup : cpuSetArr) { + if (cpuSetGroup.contains("-")) { + final String[] cpuSetGroupSplit = cpuSetGroup.split("-"); + // Values are inclusive + sumCpus += Integer.parseInt(cpuSetGroupSplit[1]) - Integer.parseInt(cpuSetGroupSplit[0]) + 1; + } else { + sumCpus += 1; + } + } + return sumCpus; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/DdbUtil.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/DdbUtil.java new file mode 100644 index 000000000..f90477260 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/DdbUtil.java @@ -0,0 +1,67 @@ +package software.amazon.kinesis.utils; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.Supplier; + +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; +import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement; +import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput; +import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest; +import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsResponse; +import software.amazon.kinesis.common.DdbTableConfig; + +import static java.util.Objects.nonNull; + +@Slf4j +public final class DdbUtil { + + @NonNull + public static Supplier> tableCreator( + final Supplier> keySchemaProvider, + final Supplier> attributeDefinitionProvider, + final DdbTableConfig tableConfig, + final DynamoDbAsyncClient dynamoDbAsyncClient) { + final CreateTableRequest.Builder createTableRequest = CreateTableRequest.builder() + .tableName(tableConfig.tableName()) + .keySchema(keySchemaProvider.get()) + .attributeDefinitions(attributeDefinitionProvider.get()) + .deletionProtectionEnabled(tableConfig.deletionProtectionEnabled()); + + if (nonNull(tableConfig.tags()) && !tableConfig.tags().isEmpty()) { + createTableRequest.tags(tableConfig.tags()); + } + + if (tableConfig.billingMode() == BillingMode.PROVISIONED) { + log.info( + "Creating table {} in provisioned mode with {}wcu and {}rcu", + tableConfig.tableName(), + tableConfig.writeCapacity(), + tableConfig.readCapacity()); + createTableRequest.provisionedThroughput(ProvisionedThroughput.builder() + .readCapacityUnits(tableConfig.readCapacity()) + .writeCapacityUnits(tableConfig.writeCapacity()) + .build()); + } + createTableRequest.billingMode(tableConfig.billingMode()); + return () -> dynamoDbAsyncClient.createTable(createTableRequest.build()); + } + + public static CompletableFuture pitrEnabler( + final DdbTableConfig tableConfig, final DynamoDbAsyncClient dynamoDbAsyncClient) { + if (tableConfig.pointInTimeRecoveryEnabled()) { + final UpdateContinuousBackupsRequest request = UpdateContinuousBackupsRequest.builder() + .tableName(tableConfig.tableName()) + .pointInTimeRecoverySpecification(builder -> builder.pointInTimeRecoveryEnabled(true)) + .build(); + return dynamoDbAsyncClient.updateContinuousBackups(request); + } + return CompletableFuture.completedFuture(null); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/ExponentialMovingAverage.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/ExponentialMovingAverage.java new file mode 100644 index 000000000..3b1a70e44 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/ExponentialMovingAverage.java @@ -0,0 +1,31 @@ +package software.amazon.kinesis.utils; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * Uses the formula mentioned below for simple ExponentialMovingAverage + * + * + * Values of alpha close to 1 have less of a smoothing effect and give greater weight to recent changes in the data, + * while values of alpha closer to 0 have a greater smoothing effect and are less responsive to recent changes. + */ +@RequiredArgsConstructor +public class ExponentialMovingAverage { + + private final double alpha; + + @Getter + private double value; + + private boolean initialized = false; + + public void add(final double newValue) { + if (!initialized) { + this.value = newValue; + initialized = true; + } else { + this.value = alpha * newValue + (1 - alpha) * this.value; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Statistics.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Statistics.java new file mode 100644 index 000000000..a01bf3922 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/utils/Statistics.java @@ -0,0 +1,44 @@ +package software.amazon.kinesis.utils; + +import java.util.AbstractMap; +import java.util.List; +import java.util.Map; + +public class Statistics { + + /** + * Calculates the simple mean of the given values + * @param values list of values (double) + * @return mean of the given values, if the {@param values} is empty then returns 0; + */ + public static double calculateSimpleMean(final List values) { + if (values.isEmpty()) { + return 0D; + } + double sum = 0.0; + for (final double i : values) { + sum += i; + } + return sum / values.size(); + } + + /** + * For the given values find the standard deviation (SD). + * For details of SD calculation ref : + * @param values list of values (double) + * @return Map.Entry of mean to standard deviation for {@param values}, if {@param values} is empty then return + * Map.Entry with 0 as mean and 0 as SD. + */ + public static Map.Entry calculateStandardDeviationAndMean(final List values) { + if (values.isEmpty()) { + return new AbstractMap.SimpleEntry<>(0D, 0D); + } + final double mean = calculateSimpleMean(values); + // calculate the standard deviation + double standardDeviation = 0.0; + for (final double num : values) { + standardDeviation += Math.pow(num - mean, 2); + } + return new AbstractMap.SimpleEntry<>(mean, Math.sqrt(standardDeviation / values.size())); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/WorkerMetricsSelector.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/WorkerMetricsSelector.java new file mode 100644 index 000000000..ee298b937 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/WorkerMetricsSelector.java @@ -0,0 +1,92 @@ +package software.amazon.kinesis.worker; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.impl.container.Cgroupv1CpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.container.Cgroupv2CpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.container.EcsCpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.linux.LinuxCpuWorkerMetric; +import software.amazon.kinesis.worker.platform.Ec2Resource; +import software.amazon.kinesis.worker.platform.EcsResource; +import software.amazon.kinesis.worker.platform.EksResource; +import software.amazon.kinesis.worker.platform.OperatingRangeDataProvider; +import software.amazon.kinesis.worker.platform.ResourceMetadataProvider; + +/** + * Class to select appropriate WorkerMetricStats based on the operating range provider that is available on the instance. + */ +@Slf4j +@RequiredArgsConstructor +@KinesisClientInternalApi +public class WorkerMetricsSelector { + + private static final OperatingRange DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE = + OperatingRange.builder().maxUtilization(100).build(); + + private final List workerComputePlatforms; + + /** + * Factory method to create an instance of WorkerMetricsSelector. + * + * @return WorkerMetricsSelector instance + */ + public static WorkerMetricsSelector create() { + final List resourceMetadataProviders = new ArrayList<>(); + resourceMetadataProviders.add(EcsResource.create()); + resourceMetadataProviders.add(EksResource.create()); + // ec2 has to be the last one to check + resourceMetadataProviders.add(Ec2Resource.create()); + return new WorkerMetricsSelector(resourceMetadataProviders); + } + + private Optional getOperatingRangeDataProvider() { + for (ResourceMetadataProvider platform : workerComputePlatforms) { + if (platform.isOnPlatform()) { + final ResourceMetadataProvider.ComputePlatform computePlatform = platform.getPlatform(); + log.info("Worker is running on {}", computePlatform); + return platform.getOperatingRangeDataProvider(); + } + } + return Optional.empty(); + } + + /** + * Returns a list of WorkerMetricStats based on the operating range provider the worker uses. + * + * @return List of WorkerMetricStats + */ + public List getDefaultWorkerMetrics() { + final List workerMetrics = new ArrayList<>(); + final Optional optionalProvider = getOperatingRangeDataProvider(); + if (!optionalProvider.isPresent()) { + log.warn("Did not find an operating range metadata provider."); + return workerMetrics; + } + final OperatingRangeDataProvider dataProvider = optionalProvider.get(); + log.info("Worker has operating range metadata provider {} ", dataProvider); + switch (dataProvider) { + case LINUX_PROC: + workerMetrics.add(new LinuxCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE)); + break; + case LINUX_ECS_METADATA_KEY_V4: + workerMetrics.add(new EcsCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE)); + break; + case LINUX_EKS_CGROUP_V2: + workerMetrics.add(new Cgroupv2CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE)); + break; + case LINUX_EKS_CGROUP_V1: + workerMetrics.add(new Cgroupv1CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE)); + break; + default: + break; + } + return workerMetrics; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/OperatingRange.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/OperatingRange.java new file mode 100644 index 000000000..0bc6e0fe2 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/OperatingRange.java @@ -0,0 +1,20 @@ +package software.amazon.kinesis.worker.metric; + +import com.google.common.base.Preconditions; +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +public class OperatingRange { + + /** + * Max utilization percentage allowed for the workerMetrics. + */ + private final int maxUtilization; + + private OperatingRange(final int maxUtilization) { + Preconditions.checkArgument(!(maxUtilization < 0 || maxUtilization > 100), "Invalid maxUtilization value"); + this.maxUtilization = maxUtilization; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetric.java new file mode 100644 index 000000000..72f4b864b --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetric.java @@ -0,0 +1,52 @@ +package software.amazon.kinesis.worker.metric; + +import com.google.common.base.Preconditions; +import lombok.Builder; +import lombok.Getter; +import lombok.NonNull; + +public interface WorkerMetric { + /** + * WorkerMetricStats short name that is used as attribute name for it in storage. + * @return short name for the WorkerMetricStats + */ + String getShortName(); + + /** + * Current WorkerMetricValue. WorkerMetricValue is a normalized percentage value to its max configured limits. + * E.g., if for a worker max network bandwidth is 10Gbps and current used bandwidth is 2Gbps, then WorkerMetricValue for + * NetworkWorkerMetrics will be 20 (%). + * + * @return WorkerMetricValue between 0 and 100 (both inclusive) + */ + WorkerMetricValue capture(); + + /** + * Gets the operating range for this workerMetrics + * @return Operating range for this workerMetrics + */ + OperatingRange getOperatingRange(); + + /** + * Type of the current WorkerMetricStats. + * @return WorkerMetricType + */ + WorkerMetricType getWorkerMetricType(); + + /** + * WorkerMetricValue model class is used as return type for the capture() method to have a strong checks at the build + * time of the object itself. + */ + @Builder + class WorkerMetricValue { + + @Getter + private final Double value; + + private WorkerMetricValue(@NonNull final Double value) { + Preconditions.checkArgument( + !(value < 0 || value > 100), value + " is either less than 0 or greater than 100"); + this.value = value; + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetricType.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetricType.java new file mode 100644 index 000000000..84ccf365b --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/WorkerMetricType.java @@ -0,0 +1,16 @@ +package software.amazon.kinesis.worker.metric; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public enum WorkerMetricType { + CPU("C"), + MEMORY("M"), + NETWORK_IN("NI"), + NETWORK_OUT("NO"), + THROUGHPUT("T"); + + @Getter + private final String shortName; +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetric.java new file mode 100644 index 000000000..fc8848ef9 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetric.java @@ -0,0 +1,128 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.time.Clock; +import java.util.concurrent.TimeUnit; + +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet; +import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile; + +/** + * Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory.This works for Elastic + * Kubernetes Service (EKS) containers running on Linux instances which use cgroupv1. + * + * EC2 instances must use a Linux instance that uses cgroupv1. Amazon Linux 2 uses cgroupv1. + * Fargate versions 1.4.0 and 1.3.0 use Amazon Linux 2 and can use this. + * + * CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over + * a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%. + * + * When this is invoked for the first time, the value returned is always 0 as the prev values are not available + * to calculate the diff. + * In case the file is not present or any other exception occurs, this throws IllegalArgumentException. + */ +@Slf4j +@RequiredArgsConstructor(access = AccessLevel.PACKAGE) +public class Cgroupv1CpuWorkerMetric implements WorkerMetric { + + private static final Object LOCK_OBJECT = new Object(); + private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU; + private static final String CGROUP_ROOT = "/sys/fs/cgroup/"; + private static final String CPU_TIME_FILE = CGROUP_ROOT + "cpu/cpuacct.usage"; + private static final String CPU_CFS_QUOTA_FILE = CGROUP_ROOT + "cpu/cpu.cfs_quota_us"; + private static final String CPU_CFS_PERIOD_FILE = CGROUP_ROOT + "cpu/cpu.cfs_period_us"; + private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset/cpuset.effective_cpus"; + private final OperatingRange operatingRange; + private final String cpuTimeFile; + private final String cfsQuotaFile; + private final String cfsPeriodFile; + private final String effectiveCpuSetFile; + private final Clock clock; + private double cpuLimit = -1; + private long lastCpuUseTimeNanos = 0; + private long lastSystemTimeNanos = 0; + + public Cgroupv1CpuWorkerMetric(final OperatingRange operatingRange) { + this( + operatingRange, + CPU_TIME_FILE, + CPU_CFS_QUOTA_FILE, + CPU_CFS_PERIOD_FILE, + EFFECTIVE_CPU_SET_FILE, + Clock.systemUTC()); + } + + @Override + public String getShortName() { + return CPU_WORKER_METRICS_TYPE.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + return WorkerMetricValue.builder().value(calculateCpuUsage()).build(); + } + + private double calculateCpuUsage() { + if (cpuLimit == -1) { + cpuLimit = calculateCpuLimit(); + } + + final long cpuTimeNanos = Long.parseLong(readSingleLineFile(cpuTimeFile)); + final long currentTimeNanos = TimeUnit.MILLISECONDS.toNanos(clock.millis()); + + boolean skip = false; + double cpuCoreTimeUsed; + synchronized (LOCK_OBJECT) { + if (lastCpuUseTimeNanos == 0 && lastSystemTimeNanos == 0) { + // Case where this is a first call so no diff available + skip = true; + } + + final long nanoTimeDiff = currentTimeNanos - lastSystemTimeNanos; + final long cpuUseDiff = cpuTimeNanos - lastCpuUseTimeNanos; + // This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be + // 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means + // that less than 1 CPU core was used. + cpuCoreTimeUsed = ((double) cpuUseDiff / nanoTimeDiff); + + lastCpuUseTimeNanos = cpuTimeNanos; + lastSystemTimeNanos = currentTimeNanos; + } + + if (skip) { + return 0D; + } else { + // In case of rounding error, treat everything above 100% as 100% + return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0); + } + } + + private double calculateCpuLimit() { + // Documentation on these values: + // https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs + final long cfsQuota = Long.parseLong(readSingleLineFile(cfsQuotaFile)); + final long cfsPeriod = Long.parseLong(readSingleLineFile(cfsPeriodFile)); + if (cfsQuota == -1) { + // If quota is -1, a limit is not set on the container. The container can use all available cores. + return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile)); + } else { + return ((double) cfsQuota) / cfsPeriod; + } + } + + @Override + public OperatingRange getOperatingRange() { + return operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return CPU_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetric.java new file mode 100644 index 000000000..32d767170 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetric.java @@ -0,0 +1,128 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.time.Clock; +import java.util.concurrent.TimeUnit; + +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet; +import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile; + +/** + * Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory. This works for Elastic + * Kubernetes Service (EKS) containers running on Linux instances which use cgroupv2. + * + * EC2 instances must use a Linux instance that uses cgroupv2. Amazon Linux 2023 uses cgroupv2. + * + * CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over + * a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%. + * + * When this is invoked for the first time, the value returned is always 0 as the prev values are not available + * to calculate the diff. + * In case the file is not present or any other exception occurs, this throws IllegalArgumentException. + */ +@Slf4j +@RequiredArgsConstructor(access = AccessLevel.PACKAGE) +public class Cgroupv2CpuWorkerMetric implements WorkerMetric { + + private static final Object LOCK_OBJECT = new Object(); + private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU; + private static final String CGROUP_ROOT = "/sys/fs/cgroup/"; + private static final String CPU_MAX_FILE = CGROUP_ROOT + "cpu.max"; + private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset.cpus.effective"; + private static final String CPU_STAT_FILE = CGROUP_ROOT + "cpu.stat"; + private final OperatingRange operatingRange; + private final String cpuMaxFile; + private final String effectiveCpuSetFile; + private final String cpuStatFile; + private final Clock clock; + private double cpuLimit = -1; + private long lastCpuUseTimeMicros = 0; + private long lastSystemTimeMicros = 0; + + public Cgroupv2CpuWorkerMetric(final OperatingRange operatingRange) { + this(operatingRange, CPU_MAX_FILE, EFFECTIVE_CPU_SET_FILE, CPU_STAT_FILE, Clock.systemUTC()); + } + + @Override + public String getShortName() { + return CPU_WORKER_METRICS_TYPE.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + return WorkerMetricValue.builder().value(calculateCpuUsage()).build(); + } + + private double calculateCpuUsage() { + if (cpuLimit == -1) { + cpuLimit = calculateCpuLimit(); + } + + // The first line of this file is of the format + // usage_usec $MICROSECONDS + // where $MICROSECONDS is always a number + final String cpuUsageStat = readSingleLineFile(cpuStatFile); + final long cpuTimeMicros = Long.parseLong(cpuUsageStat.split(" ")[1]); + final long currentTimeMicros = TimeUnit.MILLISECONDS.toMicros(clock.millis()); + + boolean skip = false; + double cpuCoreTimeUsed; + synchronized (LOCK_OBJECT) { + if (lastCpuUseTimeMicros == 0 && lastSystemTimeMicros == 0) { + // Case where this is a first call so no diff available + skip = true; + } + + final long microTimeDiff = currentTimeMicros - lastSystemTimeMicros; + final long cpuUseDiff = cpuTimeMicros - lastCpuUseTimeMicros; + // This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be + // 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means + // that less than 1 CPU core was used. + cpuCoreTimeUsed = ((double) cpuUseDiff / microTimeDiff); + + lastCpuUseTimeMicros = cpuTimeMicros; + lastSystemTimeMicros = currentTimeMicros; + } + + if (skip) { + return 0D; + } else { + // In case of rounding error, treat everything above 100% as 100% + return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0); + } + } + + private double calculateCpuLimit() { + // This file contains two values separated by space ($MAX $PERIOD). + // $MAX is either a number or "max" + // $PERIOD is always a number + final String cpuMax = readSingleLineFile(cpuMaxFile); + final String[] cpuMaxArr = cpuMax.split(" "); + final String max = cpuMaxArr[0]; + final String period = cpuMaxArr[1]; + + if (max.equals("max")) { + // if first value in file is "max", a limit is not set on the container. The container can use all available + // cores + return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile)); + } else { + return Double.parseDouble(max) / Long.parseLong(period); + } + } + + @Override + public OperatingRange getOperatingRange() { + return operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return CPU_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetric.java new file mode 100644 index 000000000..e95c83dd9 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetric.java @@ -0,0 +1,203 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.Iterator; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Queries the Amazon ECS task metadata endpoint version 4 to get CPU metric stats as well as allocated CPU to the ECS task and + * containers to calculate percent CPU utilization. This works for all ECS containers running on the following + * platforms: + * + * Fargate agent version 1.4.0 + * EC2 instance running at least 1.39.0 of the Amazon ECS container agent + * + * For more information, see + * https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html + */ +@Slf4j +@RequiredArgsConstructor(access = AccessLevel.PACKAGE) +public class EcsCpuWorkerMetric implements WorkerMetric { + + private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU; + private static final String SYS_VAR_ECS_METADATA_URI = "ECS_CONTAINER_METADATA_URI_V4"; + private final OperatingRange operatingRange; + private final String containerStatsUri; + private final String taskMetadataUri; + private final String containerMetadataUri; + private double containerCpuLimit = -1; + private double onlineCpus = -1; + + public EcsCpuWorkerMetric(final OperatingRange operatingRange) { + this.operatingRange = operatingRange; + + final String ecsMetadataRootUri = System.getenv(SYS_VAR_ECS_METADATA_URI); + if (ecsMetadataRootUri != null) { + this.containerStatsUri = ecsMetadataRootUri + "/stats"; + this.taskMetadataUri = ecsMetadataRootUri + "/task"; + this.containerMetadataUri = ecsMetadataRootUri; + } else { + this.containerStatsUri = null; + this.taskMetadataUri = null; + this.containerMetadataUri = null; + } + } + + @Override + public String getShortName() { + return CPU_WORKER_METRICS_TYPE.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + return WorkerMetricValue.builder().value(calculateCpuUsage()).build(); + } + + private double calculateCpuUsage() { + // Read current container metrics + final JsonNode containerStatsRootNode = readEcsMetadata(containerStatsUri); + + final long cpuUsage = containerStatsRootNode + .path("cpu_stats") + .path("cpu_usage") + .path("total_usage") + .asLong(); + final long systemCpuUsage = containerStatsRootNode + .path("cpu_stats") + .path("system_cpu_usage") + .asLong(); + final long prevCpuUsage = containerStatsRootNode + .path("precpu_stats") + .path("cpu_usage") + .path("total_usage") + .asLong(); + final long prevSystemCpuUsage = containerStatsRootNode + .path("precpu_stats") + .path("system_cpu_usage") + .asLong(); + + if (containerCpuLimit == -1 && onlineCpus == -1) { + onlineCpus = + containerStatsRootNode.path("cpu_stats").path("online_cpus").asDouble(); + containerCpuLimit = calculateContainerCpuLimit(onlineCpus); + } + + // precpu_stats values will be 0 if it is the first call + if (prevCpuUsage == 0 && prevSystemCpuUsage == 0) { + return 0D; + } + + final long cpuUsageDiff = cpuUsage - prevCpuUsage; + final long systemCpuUsageDiff = systemCpuUsage - prevSystemCpuUsage; + + // Edge case when there is no systemCpu usage, then that means that 100% of the cpu is used. + if (systemCpuUsageDiff == 0) { + return 100D; + } + + // This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be + // 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means + // that less than 1 CPU core was used. + final double cpuCoreTimeUsed = ((double) cpuUsageDiff) / systemCpuUsageDiff * onlineCpus; + + // This calculated value is cpu utilization percent. This can burst past 100%, but we will take min with 100% + // because only this amount is guaranteed CPU time to the container + return Math.min(100.0, cpuCoreTimeUsed / containerCpuLimit * 100.0); + } + + /** + * All containers in an ECS task can use up to the task level CPU limit. However, CPU is shared among all containers + * in the task according to the relative ratio of CPU shares allocated to each container. + * i.e. + * CPU limit of task is 8 cores + * Container 1 with 10 CPU shares + * Container 2 with 30 CPU shares + * Sum of CPU shares is 40 + * Container 1 can use 25% of the 8 cores in CPU core time, so this function returns 2 + * Container 2 can use 75% of the 8 cores in CPU core time, so this function returns 6 + * @return the CPU core time allocated to the container + */ + private double calculateContainerCpuLimit(double onlineCpus) { + // Read task metadata + final JsonNode taskStatsRootNode = readEcsMetadata(taskMetadataUri); + double taskCpuLimit = calculateTaskCpuLimit(taskStatsRootNode, onlineCpus); + + // Read current container metadata + final String currentContainerId = + readEcsMetadata(containerMetadataUri).path("DockerId").asText(); + final Iterator containersIterator = + taskStatsRootNode.path("Containers").iterator(); + + // The default if this value is not provided is 2 CPU shares (in ECS agent versions >= 1.2.0) + int currentContainerCpuShare = 2; + int containersCpuShareSum = 0; + while (containersIterator.hasNext()) { + final JsonNode containerNode = containersIterator.next(); + final int containerCpuShare = + containerNode.path("Limits").path("CPU").asInt(); + if (containerNode.path("DockerId").asText().equals(currentContainerId)) { + currentContainerCpuShare = containerCpuShare; + } + containersCpuShareSum += containerCpuShare; + } + return ((double) currentContainerCpuShare) / containersCpuShareSum * taskCpuLimit; + } + + private double calculateTaskCpuLimit(JsonNode taskStatsRootNode, double onlineCpus) { + final JsonNode limitsNode = taskStatsRootNode.path("Limits"); + if (limitsNode.isMissingNode()) { + // Neither a memory limit nor cpu limit is set at the task level (possible on EC2 instances) + return onlineCpus; + } + final JsonNode cpuLimitsNode = limitsNode.path("CPU"); + if (cpuLimitsNode.isMissingNode()) { + // When only a memory limit is set at the task level (possible on ec2 instances) + return onlineCpus; + } + return cpuLimitsNode.asDouble(); + } + + private JsonNode readEcsMetadata(String uri) { + if (this.containerMetadataUri == null) { + throw new IllegalArgumentException("No ECS metadata endpoint found from environment variables."); + } + + URL url; + try { + url = new URL(uri); + } catch (MalformedURLException e) { + throw new IllegalArgumentException( + "CpuWorkerMetrics is not configured properly. ECS metadata url is malformed", e); + } + try { + final ObjectMapper mapper = new ObjectMapper(); + final JsonNode rootNode = + mapper.readValue(new InputStreamReader(url.openStream(), Charset.defaultCharset()), JsonNode.class); + return rootNode; + } catch (IOException e) { + throw new IllegalArgumentException("Error in parsing ECS metadata", e); + } + } + + @Override + public OperatingRange getOperatingRange() { + return operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return CPU_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetric.java new file mode 100644 index 000000000..7d407a6dc --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetric.java @@ -0,0 +1,108 @@ +package software.amazon.kinesis.worker.metric.impl.jmx; + +import java.lang.management.ManagementFactory; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.management.MBeanServerConnection; +import javax.management.ObjectName; +import javax.management.openmbean.CompositeDataSupport; + +import lombok.RequiredArgsConstructor; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Memory WorkerMetricStats that reads the heap memory after GC. The way memory usage is calculated that, all the + * available memory pools are read except Eden (as this is allocation buffer) and used memory and total memory is + * computed. + * Then percentage is computed by dividing used memory by total memory. + * + */ +@RequiredArgsConstructor +public class HeapMemoryAfterGCWorkerMetric implements WorkerMetric { + + private static final WorkerMetricType MEMORY_WORKER_METRICS_TYPE = WorkerMetricType.MEMORY; + + private final OperatingRange operatingRange; + + private Set garbageCollectorMxBeans; + private Set memoryPoolNames; + + @Override + public String getShortName() { + return MEMORY_WORKER_METRICS_TYPE.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + return WorkerMetricValue.builder() + .value(getAfterGCMemoryUsage(ManagementFactory.getPlatformMBeanServer())) + .build(); + } + + private double getAfterGCMemoryUsage(final MBeanServerConnection connection) { + try { + if (garbageCollectorMxBeans == null) { + garbageCollectorMxBeans = connection.queryNames( + new ObjectName(ManagementFactory.GARBAGE_COLLECTOR_MXBEAN_DOMAIN_TYPE + ",*"), null); + + memoryPoolNames = new HashSet(); + for (ObjectName on : garbageCollectorMxBeans) { + String[] poolNames = (String[]) connection.getAttribute(on, "MemoryPoolNames"); + // A given MemoryPool may be associated with multiple GarbageCollectors, + // but will appear only once in memoryPoolNames + Collections.addAll(memoryPoolNames, poolNames); + } + } + + // Report on the sum of non-Eden HEAP spaces after the last gc + Long used, max; + long usedKb = 0, totalKb = 0; + + for (String poolName : memoryPoolNames) { + if (!poolName.contains("Eden")) { + // Ignore Eden, since it's just an allocation buffer + ObjectName on = + new ObjectName(ManagementFactory.MEMORY_POOL_MXBEAN_DOMAIN_TYPE + ",name=" + poolName); + String mt = (String) connection.getAttribute(on, "Type"); + if (mt.equals("HEAP")) { + // Paranoia: ignore non-HEAP memory pools + CompositeDataSupport data = + (CompositeDataSupport) connection.getAttribute(on, "CollectionUsage"); + + used = (Long) data.get("used"); + usedKb += used / 1024; + + max = (Long) data.get("max"); + // max can be undefined (-1) + // http://docs.oracle.com/javase/7/docs/api/java/lang/management/MemoryUsage.html + totalKb += max == -1 ? 0 : max / 1024; + } + } + } + + if (totalKb <= 0) { + throw new IllegalArgumentException("Total memory value for JVM is greater than zero"); + } + + return 100.0 * (double) usedKb / (double) totalKb; + } catch (final Exception e) { + if (e instanceof IllegalArgumentException) { + throw (IllegalArgumentException) e; + } + throw new IllegalArgumentException(e); + } + } + + @Override + public OperatingRange getOperatingRange() { + return operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return MEMORY_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetric.java new file mode 100644 index 000000000..51721373a --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetric.java @@ -0,0 +1,133 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; + +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Reads CPU usage statistics out of /proc/stat file that is present on the EC2 instances. The value is % utilization + * of the CPU. + * When this is invoked for the first time, the value returned is always 0 as the prev values are not available + * to calculate the diff. If the file hasn't changed this also returns 0. + * In case the file is not present or any other exception occurs, this throws IllegalArgumentException. + */ +@Slf4j +@RequiredArgsConstructor(access = AccessLevel.PACKAGE) +public class LinuxCpuWorkerMetric implements WorkerMetric { + + private static final Object LOCK_OBJECT = new Object(); + private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU; + private final OperatingRange operatingRange; + private final String statFile; + private long lastUsr, lastIow, lastSys, lastIdl, lastTot; + private String lastLine; + + public LinuxCpuWorkerMetric(final OperatingRange operatingRange) { + this(operatingRange, "/proc/stat"); + } + + @Override + public String getShortName() { + return CPU_WORKER_METRICS_TYPE.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + return WorkerMetricValue.builder().value(calculateCpuUsage()).build(); + } + + private double calculateCpuUsage() { + BufferedReader bufferedReader = null; + try { + + final File stat = new File(statFile); + if (stat.exists()) { + + bufferedReader = new BufferedReader(new FileReader(stat)); + final String line = bufferedReader.readLine(); + final String[] lineVals = line.split("\\s+"); + + long usr = Long.parseLong(lineVals[1]) + Long.parseLong(lineVals[2]); + long sys = Long.parseLong(lineVals[3]); + long idl = Long.parseLong(lineVals[4]); + long iow = Long.parseLong(lineVals[5]); + long tot = usr + sys + idl + iow; + long diffIdl = -1; + long diffTot = -1; + + boolean skip = false; + synchronized (LOCK_OBJECT) { + if (lastUsr == 0 || line.equals(lastLine)) { + // Case where this is a first call so no diff available or + // /proc/stat file is not updated since last time. + skip = true; + } + + diffIdl = Math.abs(idl - lastIdl); + diffTot = Math.abs(tot - lastTot); + if (diffTot < diffIdl) { + log.warn( + "diffTot is less than diff_idle. \nPrev cpu line : {} and current cpu line : {} ", + lastLine, + line); + if (iow < lastIow) { + // this is case where current iow value less than prev, this can happen in rare cases as per + // https://docs.kernel.org/filesystems/proc.html, and when the worker is idle + // there is no increase in usr or sys values as well resulting in diffTot < diffIdl as + // current tot increases less than current idl + // return 0 in this case as this is the case where worker is not doing anything anyways. + skip = true; + } + } + lastUsr = usr; + lastSys = sys; + lastIdl = idl; + lastIow = iow; + lastTot = usr + sys + idl + iow; + lastLine = line; + } + + if (skip) { + return 0D; + } + + return ((double) (diffTot - diffIdl) / (double) diffTot) * 100.0; + + } else { + throw new IllegalArgumentException(String.format( + "LinuxCpuWorkerMetric is not configured properly, file : %s does not exists", this.statFile)); + } + } catch (final Throwable t) { + if (t instanceof IllegalArgumentException) { + throw (IllegalArgumentException) t; + } + throw new IllegalArgumentException( + "LinuxCpuWorkerMetric failed to read metric stats or not configured properly.", t); + } finally { + try { + if (bufferedReader != null) { + bufferedReader.close(); + } + } catch (Throwable x) { + log.warn("Failed to close bufferedReader ", x); + } + } + } + + @Override + public OperatingRange getOperatingRange() { + return operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return CPU_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkInWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkInWorkerMetric.java new file mode 100644 index 000000000..98fefdd26 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkInWorkerMetric.java @@ -0,0 +1,42 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Stopwatch; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Ref java doc for {@link LinuxNetworkWorkerMetricBase} + */ +public class LinuxNetworkInWorkerMetric extends LinuxNetworkWorkerMetricBase { + private static final WorkerMetricType NETWORK_IN_WORKER_METRICS_TYPE = WorkerMetricType.NETWORK_IN; + + public LinuxNetworkInWorkerMetric( + final OperatingRange operatingRange, final String interfaceName, final double maxBandwidthInMB) { + this(operatingRange, interfaceName, DEFAULT_NETWORK_STAT_FILE, maxBandwidthInMB, Stopwatch.createUnstarted()); + } + + public LinuxNetworkInWorkerMetric(final OperatingRange operatingRange, final double maxBandwidthInMB) { + this( + operatingRange, + DEFAULT_INTERFACE_NAME, + DEFAULT_NETWORK_STAT_FILE, + maxBandwidthInMB, + Stopwatch.createUnstarted()); + } + + @VisibleForTesting + LinuxNetworkInWorkerMetric( + final OperatingRange operatingRange, + final String interfaceName, + final String statFile, + final double maxBandwidthInMB, + final Stopwatch stopwatch) { + super(operatingRange, interfaceName, statFile, maxBandwidthInMB, stopwatch); + } + + @Override + protected WorkerMetricType getWorkerMetricsType() { + return NETWORK_IN_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkOutWorkerMetric.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkOutWorkerMetric.java new file mode 100644 index 000000000..acdcc557b --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkOutWorkerMetric.java @@ -0,0 +1,42 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Stopwatch; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Ref java doc for {@link LinuxNetworkWorkerMetricBase} + */ +public class LinuxNetworkOutWorkerMetric extends LinuxNetworkWorkerMetricBase { + private static final WorkerMetricType NETWORK_OUT_WORKER_METRICS_TYPE = WorkerMetricType.NETWORK_OUT; + + public LinuxNetworkOutWorkerMetric( + final OperatingRange operatingRange, final String interfaceName, final double maxBandwidthInMB) { + this(operatingRange, interfaceName, DEFAULT_NETWORK_STAT_FILE, maxBandwidthInMB, Stopwatch.createUnstarted()); + } + + public LinuxNetworkOutWorkerMetric(final OperatingRange operatingRange, final double maxBandwidthInMB) { + this( + operatingRange, + DEFAULT_INTERFACE_NAME, + DEFAULT_NETWORK_STAT_FILE, + maxBandwidthInMB, + Stopwatch.createUnstarted()); + } + + @VisibleForTesting + LinuxNetworkOutWorkerMetric( + final OperatingRange operatingRange, + final String interfaceName, + final String statFile, + final double maxBandwidthInMB, + final Stopwatch stopwatch) { + super(operatingRange, interfaceName, statFile, maxBandwidthInMB, stopwatch); + } + + @Override + protected WorkerMetricType getWorkerMetricsType() { + return NETWORK_OUT_WORKER_METRICS_TYPE; + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricBase.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricBase.java new file mode 100644 index 000000000..c99c64ca7 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricBase.java @@ -0,0 +1,188 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.time.Duration; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; +import com.google.common.collect.ImmutableMap; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +/** + * Base class for EC2NetworkWorkerMetrics, this reads and parses /proc/net/dev file and look for the specific + * interface and reads received and transmitted bytes. + * To get the percentage of bandwidth consumed, the fetch bytes are converted to per second (based on the interval + * between invocation) and percentage is calculated by dividing it by the maximum bandwidth in MBps. + * + * When this is invoked for the first time, the value returned is always 0 as the prev values are not available + * to calculate the diff. + * In case the stat file is not present or any other exception occurs, this throws IllegalArgumentException. + */ +@Slf4j +public abstract class LinuxNetworkWorkerMetricBase implements WorkerMetric { + + protected static final String DEFAULT_NETWORK_STAT_FILE = "/proc/net/dev"; + protected static final String DEFAULT_INTERFACE_NAME = "eth0"; + private final Object lockObject = new Object(); + + private final OperatingRange operatingRange; + private final String interfaceName; + private final String statFile; + private final double maxBandwidthInMBps; + // Stopwatch to keep track of elapsed time between invocation. + private final Stopwatch stopwatch; + + public LinuxNetworkWorkerMetricBase( + final OperatingRange operatingRange, + final String interfaceName, + final String statFile, + final double maxBandwidthInMBps, + final Stopwatch stopwatch) { + Preconditions.checkArgument(maxBandwidthInMBps > 0, "maxBandwidthInMBps should be greater than 0."); + this.operatingRange = operatingRange; + this.interfaceName = interfaceName; + this.statFile = statFile; + this.maxBandwidthInMBps = maxBandwidthInMBps; + this.stopwatch = stopwatch; + } + + private long lastRx = -1; + private long lastTx = -1; + + @Override + public String getShortName() { + return getWorkerMetricsType().getShortName(); + } + + @Override + public OperatingRange getOperatingRange() { + return this.operatingRange; + } + + @Override + public WorkerMetricType getWorkerMetricType() { + return getWorkerMetricsType(); + } + + /** + * Reads the stat file and find the total bytes (in and out) and divide it by the time elapsed since last read to + * get the bytes per second. + * Converts the bytes per second to MBps and then normalizes it to a percentage of the maximum bandwidth. + * @return WorkerMetricValue with the % of network bandwidth consumed. + */ + @Override + public WorkerMetricValue capture() { + final double percentageOfMaxBandwidth = + convertToMBps(calculateNetworkUsage().get(getWorkerMetricsType())) / maxBandwidthInMBps * 100; + return WorkerMetricValue.builder() + // If maxBandwidthInMBps is less than utilized (could be wrong configuration), + // default to 100 % bandwidth utilization. + .value(Math.min(100, percentageOfMaxBandwidth)) + .build(); + } + + private double convertToMBps(final long bytes) { + final double elapsedTimeInSecond; + if (!stopwatch.isRunning()) { + // stopwatch is not running during the first request only, in this case assume 1 second as elapsed as + // during the first request even bytes are zero, any value of elapsedTimeInSecond does not have any effect. + elapsedTimeInSecond = 1.0; + } else { + // Specifically, getting nanos and converting to seconds to get the decimal precision. + elapsedTimeInSecond = (double) stopwatch.elapsed().toNanos() + / Duration.ofSeconds(1).toNanos(); + } + stopwatch.reset().start(); + // Convert bytes to MB + final double totalDataMB = (double) bytes / (1024 * 1024); + if (elapsedTimeInSecond == 0) { + // This should never happen, as getting called twice within 1 nanoSecond is never expected. + // If this happens something is real wrong. + throw new IllegalArgumentException("elapsedTimeInSecond is zero which in incorrect"); + } + return totalDataMB / elapsedTimeInSecond; + } + + protected abstract WorkerMetricType getWorkerMetricsType(); + + /** + * Returns the absolute bytes in and out since the last invocation of the method. + * @return Map of WorkerMetricType to bytes + */ + private Map calculateNetworkUsage() { + BufferedReader bufferedReader = null; + try { + final File net = new File(statFile); + if (net.exists()) { + bufferedReader = new BufferedReader(new FileReader(net)); + + // skip over header lines + bufferedReader.readLine(); + bufferedReader.readLine(); + + // find specified interface + String line = bufferedReader.readLine(); + while (line != null && !line.matches("^\\s*" + interfaceName + ":.*")) { + line = bufferedReader.readLine(); + } + if (line == null) { + throw new IllegalArgumentException( + "Failed to parse the file and find interface : " + interfaceName); + } + + int n = line.indexOf(':') + 1; + line = line.substring(n).trim(); + String[] parts = line.split("\\s+"); + + long rx = Long.parseLong(parts[0]); + long tx = Long.parseLong(parts[8]); + long diffRx = -1, diffTx = -1; + boolean skip = false; + synchronized (lockObject) { + if (lastRx == -1) { + skip = true; + } else { + diffRx = Math.abs(rx - lastRx); + diffTx = Math.abs(tx - lastTx); + } + lastRx = rx; + lastTx = tx; + } + + if (skip) { + return createResponse(0L, 0L); + } + + return createResponse(diffRx, diffTx); + } else { + throw new IllegalArgumentException(String.format( + "NetworkWorkerMetrics is not configured properly, file : %s does not exists", this.statFile)); + } + } catch (final Throwable t) { + if (t instanceof IllegalArgumentException) { + throw (IllegalArgumentException) t; + } + throw new IllegalArgumentException("Cannot read/parse " + this.statFile, t); + } finally { + try { + if (bufferedReader != null) { + bufferedReader.close(); + } + } catch (Throwable x) { + log.warn("Failed to close bufferedReader ", x); + } + } + } + + private Map createResponse(final long diffRx, final long diffTx) { + return ImmutableMap.of( + WorkerMetricType.NETWORK_IN, diffRx, + WorkerMetricType.NETWORK_OUT, diffTx); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStats.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStats.java new file mode 100644 index 000000000..fb26f55c3 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStats.java @@ -0,0 +1,321 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbAttribute; +import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbBean; +import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbIgnore; +import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbPartitionKey; +import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition; +import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement; +import software.amazon.awssdk.services.dynamodb.model.KeyType; +import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType; +import software.amazon.kinesis.utils.ExponentialMovingAverage; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +import static java.util.Objects.isNull; + +/** + * DataModel for a WorkerMetric, this data model is used to store the current state of a Worker in terms of relevant + * WorkerMetric(CPU, Memory, Network). + * + * workerId : unique worker identifier, this is equivalent to the owner attribute from the lease table. + * lastUpdateTime : wall epoch in seconds when the entry was last updated + * metricStats : Map of WorkerMetric to last N values for it. e.g. entry "CPU" : [10,20,12,10] etc + * operatingRange : Map of WorkerMetric to its operating range. First item in the list of values defines the max limit. + * metricStatsMap : runtime computed WorkerMetric name to its average value map. This field is not stored in ddb + * and is used during Lease assignment only + */ +@Data +@Builder +@DynamoDbBean +@NoArgsConstructor +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@Slf4j +public class WorkerMetricStats { + + static final String KEY_LAST_UPDATE_TIME = "lut"; + static final String KEY_WORKER_ID = "wid"; + + @Getter(onMethod_ = {@DynamoDbPartitionKey, @DynamoDbAttribute(KEY_WORKER_ID)}) + private String workerId; + + @Getter(onMethod_ = {@DynamoDbAttribute(KEY_LAST_UPDATE_TIME)}) + private Long lastUpdateTime; + + @Getter(onMethod_ = {@DynamoDbAttribute("sts")}) + private Map> metricStats; + + @Getter(onMethod_ = {@DynamoDbAttribute("opr")}) + private Map> operatingRange; + + /** + * This map contains the WorkerMetric to its metric stat value. Metric stat value stored in this is exponentially averaged over + * available number of different datapoints. + */ + @Getter(onMethod_ = {@DynamoDbIgnore}) + @EqualsAndHashCode.Exclude + @Builder.Default + private Map metricStatsMap = new HashMap<>(); + + /** + * Alpha value used to compute the exponential moving average for worker metrics values. + */ + @Getter(onMethod_ = {@DynamoDbIgnore}) + @EqualsAndHashCode.Exclude + @Builder.Default + private double emaAlpha = 0.2; + + /** + * Returns true if given {@param workerMetricName} is available for the current worker else false + */ + public boolean containsMetricStat(final String workerMetricName) { + return metricStats.containsKey(workerMetricName); + } + + /** + * Returns the value for given WorkerMetricStats name. + */ + public double getMetricStat(final String workerMetricName) { + return metricStatsMap.computeIfAbsent(workerMetricName, (key) -> computeAverage(metricStats.get(key))); + } + + /** + * Increase the WorkerMetricStats value by given increaseLoadPercentage. This is done during execution of LAM and + * as assignments are happening the current metric stat value is increased based on increaseLoadPercentage. + */ + public void extrapolateMetricStatValuesForAddedThroughput( + final Map workerMetricsToFleetLevelAverageMap, + final double averageThroughput, + final double increaseThroughput, + final double averageLeaseCount) { + + metricStatsMap.replaceAll((key, value) -> extrapolateMetricsValue( + key, + workerMetricsToFleetLevelAverageMap.get(key), + averageThroughput, + increaseThroughput, + averageLeaseCount)); + } + + private double extrapolateMetricsValue( + final String metricName, + final double fleetLevelMetricAverage, + final double averageThroughput, + final double increaseThroughput, + final double averageLeaseCount) { + + if (averageThroughput > 0) { + return metricStatsMap.get(metricName) + increaseThroughput * fleetLevelMetricAverage / averageThroughput; + } else { + return metricStatsMap.get(metricName) + fleetLevelMetricAverage / averageLeaseCount; + } + } + + public boolean willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange( + final Map workerMetricsToFleetLevelAverageMap, + final double averageThroughput, + final double increaseThroughput, + final double averageLeaseCount) { + for (final String metricStatName : metricStats.keySet()) { + final double fleetLevelAverageForMetric = workerMetricsToFleetLevelAverageMap.get(metricStatName); + final double updatedValueToBe = extrapolateMetricsValue( + metricStatName, + fleetLevelAverageForMetric, + averageThroughput, + increaseThroughput, + averageLeaseCount); + + if (updatedValueToBe > fleetLevelAverageForMetric + || updatedValueToBe > operatingRange.get(metricStatName).get(0)) { + return true; + } + } + return false; + } + + /** + * Increase the metric stat value corresponding to the added single lease. This is done during execution of LAM and + * as assignments are happening the load is increase for LAM to determine workers for assignment. + * The increase is done considering that for a WorkerMetric the fleet level average would be met when fleet level + * average leases are assigned to a worker and thus 1 lease addition increases the metric stat value by fleet level + * average of metric stat by averageLeaseCount + */ + public void extrapolateMetricStatValuesForAddedLease( + final Map workerMetricToFleetLevelAverage, final int averageLeaseCount) { + for (Map.Entry workerMetricToMetricStat : metricStatsMap.entrySet()) { + final String workerMetricName = workerMetricToMetricStat.getKey(); + final Double updatedValue = workerMetricToMetricStat.getValue() + + workerMetricToFleetLevelAverage.get(workerMetricName) / averageLeaseCount; + metricStatsMap.replace(workerMetricName, updatedValue); + } + } + + /** + * Determines percentage of load to reach the mean for the worker. In case of multiple worker metrics the metric stat + * value closest to mean is used to determine the percentage value. This value is indication of how much load in + * percentage to current load the worker can take to reach mean value. + * @param workerMetricToFleetLevelAverage : WorkerMetric to fleet level mean value. + * @return percentage to reach mean based on the WorkerMetric closest to its corresponding average. + */ + public double computePercentageToReachAverage(final Map workerMetricToFleetLevelAverage) { + double minDifferencePercentage = Double.MAX_VALUE; + for (final String workerMetricName : metricStats.keySet()) { + final double metricStatValue = getMetricStat(workerMetricName); + final double differenceRatio; + if (metricStatValue == 0D) { + // If metric stat value is 0 that means this worker does not have any load so we assume that this worker + // can take 100% more load than the current to reach average. + differenceRatio = 1; + } else { + differenceRatio = + (workerMetricToFleetLevelAverage.get(workerMetricName) - metricStatValue) / metricStatValue; + } + minDifferencePercentage = Math.min(minDifferencePercentage, differenceRatio); + } + return minDifferencePercentage; + } + + private Double computeAverage(final List values) { + if (values.isEmpty()) { + return 0D; + } + final ExponentialMovingAverage average = new ExponentialMovingAverage(emaAlpha); + // Ignore -1 which denotes the WorkerMetric failure when calculating average, as it possible in past + // one of the value is -1 due to some intermediate failure, and it has recovered since. + values.forEach(value -> { + if (value != -1) { + average.add(value); + } + }); + return average.getValue(); + } + + /** + * Returns true if any of the metric stat values has -1 in last index which represents that the metric stat value + * was not successfully fetched in last attempt by worker. + * + * @return true if any metric stat value has -1 in last index, false otherwise. + */ + public boolean isAnyWorkerMetricFailing() { + boolean response = false; + if (isUsingDefaultWorkerMetric()) { + return response; + } + for (final Map.Entry> resourceStatsEntry : metricStats.entrySet()) { + if (resourceStatsEntry.getValue().isEmpty()) { + continue; + } + final Double lastEntry = resourceStatsEntry + .getValue() + .get(resourceStatsEntry.getValue().size() - 1); + if (lastEntry != null && lastEntry == -1D) { + response = true; + break; + } + } + if (response) { + log.warn("WorkerStats: {} has a WorkerMetric which is failing.", this); + } + return response; + } + + /** + * WorkerMetricStats entry is invalid + * if any of the field from lastUpdateTime, operatingRange, resourcesStats are not present or + * if resourcesStats is empty or + * if any of the WorkerMetrics having resourceStats does not have operatingRange or + * if operating range values are not present or + * if maxUtilization is 0 for any WorkerMetric + * @return true if the entry is valid false otherwise. + */ + public boolean isValidWorkerMetric() { + if (isNull(lastUpdateTime)) { + return false; + } + if (isUsingDefaultWorkerMetric()) { + return true; + } + if (isNull(metricStats) || isNull(operatingRange)) { + return false; + } + for (final Map.Entry> entry : metricStats.entrySet()) { + if (!operatingRange.containsKey(entry.getKey())) { + return false; + } + } + for (final Map.Entry> operatingRangeEntry : operatingRange.entrySet()) { + // If operatingRange for a WorkerMetric is missing or if maxUtilization is 0 then its not valid entry. + if (operatingRangeEntry.getValue().isEmpty() + || operatingRangeEntry.getValue().get(0) == 0) { + return false; + } + } + return true; + } + + public boolean isAnyWorkerMetricAboveAverageUtilizationOrOperatingRange( + final Map workerMetricToFleetLevelAverage) { + for (final String workerMetricName : metricStats.keySet()) { + final double value = getMetricStat(workerMetricName); + if (value > workerMetricToFleetLevelAverage.get(workerMetricName)) { + return true; + } + } + // check if any metric stat value is above operating range. + return workerMetricToFleetLevelAverage.keySet().stream().anyMatch(this::isWorkerMetricAboveOperatingRange); + } + + /** + * If a worker is not using an explicit WorkerMetric such as CPU, Memory, or Network, then it + * is said to be using the default WorkerMetric. Load management then falls back to throughput. + * @return true if the worker is not using an explicit WorkerMetric. + */ + public boolean isUsingDefaultWorkerMetric() { + if ((metricStats == null || metricStats.isEmpty()) && (operatingRange == null || operatingRange.isEmpty())) { + return true; + } + if (metricStats != null) { + return metricStats.entrySet().stream() + .anyMatch(entry -> entry.getKey().equals(WorkerMetricType.THROUGHPUT.name())); + } + return false; + } + + /** + * Evaluates if the given metric stat is above operatingRange for the given WorkerMetric name. If the WorkerMetric + * does not exist returns false + * @param workerMetricName WorkerMetric name to evaluate + * @return true if metric stat exists and is above operatingRange for the WorkerMetric + */ + public boolean isWorkerMetricAboveOperatingRange(final String workerMetricName) { + return metricStatsMap.containsKey(workerMetricName) + && metricStatsMap.get(workerMetricName) + > operatingRange.get(workerMetricName).get(0); + } + + public static List getKeySchema() { + return Collections.singletonList(KeySchemaElement.builder() + .attributeName(KEY_WORKER_ID) + .keyType(KeyType.HASH) + .build()); + } + + public static List getAttributeDefinitions() { + return Collections.singletonList(AttributeDefinition.builder() + .attributeName(KEY_WORKER_ID) + .attributeType(ScalarAttributeType.S) + .build()); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsDAO.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsDAO.java new file mode 100644 index 000000000..f7a42c9e1 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsDAO.java @@ -0,0 +1,191 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.core.waiters.WaiterResponse; +import software.amazon.awssdk.enhanced.dynamodb.DynamoDbAsyncTable; +import software.amazon.awssdk.enhanced.dynamodb.DynamoDbEnhancedAsyncClient; +import software.amazon.awssdk.enhanced.dynamodb.Expression; +import software.amazon.awssdk.enhanced.dynamodb.Key; +import software.amazon.awssdk.enhanced.dynamodb.TableSchema; +import software.amazon.awssdk.enhanced.dynamodb.model.DeleteItemEnhancedRequest; +import software.amazon.awssdk.enhanced.dynamodb.model.UpdateItemEnhancedRequest; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; +import software.amazon.awssdk.services.dynamodb.model.TableStatus; +import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.utils.DdbUtil; + +import static java.util.Objects.nonNull; +import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture; +import static software.amazon.kinesis.worker.metricstats.WorkerMetricStats.KEY_LAST_UPDATE_TIME; +import static software.amazon.kinesis.worker.metricstats.WorkerMetricStats.KEY_WORKER_ID; + +@Slf4j +public class WorkerMetricStatsDAO { + private final DynamoDbEnhancedAsyncClient dynamoDbEnhancedAsyncClient; + private final DynamoDbAsyncTable table; + private final DynamoDbAsyncClient dynamoDbAsyncClient; + private final WorkerMetricsTableConfig tableConfig; + private final Long workerMetricsReporterFrequencyMillis; + + public WorkerMetricStatsDAO( + final DynamoDbAsyncClient dynamoDbAsyncClient, + final WorkerMetricsTableConfig tableConfig, + final Long workerMetricsReporterFrequencyMillis) { + this.dynamoDbAsyncClient = dynamoDbAsyncClient; + this.dynamoDbEnhancedAsyncClient = DynamoDbEnhancedAsyncClient.builder() + .dynamoDbClient(dynamoDbAsyncClient) + .build(); + this.table = dynamoDbEnhancedAsyncClient.table( + tableConfig.tableName(), TableSchema.fromBean(WorkerMetricStats.class)); + this.tableConfig = tableConfig; + this.workerMetricsReporterFrequencyMillis = workerMetricsReporterFrequencyMillis; + } + + /** + * Performs initialization of the WorkerMetricStats DAO and table. + * This will create the table if it doesn't exist. + */ + public void initialize() throws DependencyException { + createTableIfDoesNotExist(); + } + + /** + * Updates the workerMetrics for the provided worker, method ignores the null attributes and overrides + * the only non-null from {@param workerMetrics}. This is a blocking call. + * + * @param workerMetrics : Updated WorkerMetricStats object, resourceStats, workerId and lastUpdateTime are + * required fields from {@param workerMetrics} + */ + public void updateMetrics(final WorkerMetricStats workerMetrics) { + validateWorkerMetrics(workerMetrics); + final UpdateItemEnhancedRequest request = UpdateItemEnhancedRequest.builder( + WorkerMetricStats.class) + .item(workerMetrics) + .ignoreNulls(true) + .build(); + unwrappingFuture(() -> table.updateItem(request)); + } + + /** + * Deletes the WorkerMetricStats entry with conditional check on lastUpdateTime, if the worker has come alive and + * updated the lastUpdateTime then we no longer need to perform the deletion. + * @param workerMetrics WorkerMetricStats that needs to be deleted. + * @return + */ + public boolean deleteMetrics(final WorkerMetricStats workerMetrics) { + Preconditions.checkArgument(nonNull(workerMetrics.getWorkerId()), "WorkerID is not provided"); + Preconditions.checkArgument(nonNull(workerMetrics.getLastUpdateTime()), "LastUpdateTime is not provided"); + + final DeleteItemEnhancedRequest request = DeleteItemEnhancedRequest.builder() + .key(Key.builder().partitionValue(workerMetrics.getWorkerId()).build()) + .conditionExpression(Expression.builder() + .expression(String.format("#key = :value AND attribute_exists (%s)", KEY_WORKER_ID)) + .expressionNames(ImmutableMap.of("#key", KEY_LAST_UPDATE_TIME)) + .expressionValues(ImmutableMap.of( + ":value", AttributeValue.fromN(Long.toString(workerMetrics.getLastUpdateTime())))) + .build()) + .build(); + + try { + unwrappingFuture(() -> table.deleteItem(request)); + return true; + } catch (final ConditionalCheckFailedException e) { + log.warn( + "Failed to delete the WorkerMetricStats due to conditional failure for worker : {}", + workerMetrics, + e); + return false; + } + } + + private void validateWorkerMetrics(final WorkerMetricStats workerMetrics) { + Preconditions.checkArgument(nonNull(workerMetrics.getMetricStats()), "ResourceMetrics not provided"); + + final List entriesWithoutValues = workerMetrics.getMetricStats().entrySet().stream() + .filter(entry -> entry.getValue() == null || entry.getValue().isEmpty()) + .map(Map.Entry::getKey) + .collect(Collectors.toList()); + + Preconditions.checkArgument( + entriesWithoutValues.isEmpty(), "Following metric stats dont have any values " + entriesWithoutValues); + + Preconditions.checkArgument(nonNull(workerMetrics.getLastUpdateTime()), "LastUpdateTime field not set"); + + // If the LastUpdateTime field is 2x older than the reporter interval, it is considered stale. + Preconditions.checkArgument( + Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now()) + .toMillis() + < 2 * workerMetricsReporterFrequencyMillis, + "LastUpdateTime is more than 2x older than workerMetricsReporterFrequencyMillis"); + } + + /** + * Performs the scan on the storage and returns list of all workerMetricStats objects. + * + * @return : List of all worker metric stats + */ + public List getAllWorkerMetricStats() { + log.debug("Scanning DDB table {}", table.tableName()); + final List workerMetricStats = new ArrayList<>(); + unwrappingFuture(() -> table.scan().items().subscribe(workerMetricStats::add)); + return workerMetricStats; + } + + private TableDescription getTableDescription() { + try { + final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable( + DescribeTableRequest.builder().tableName(table.tableName()).build())); + return response.table(); + } catch (final ResourceNotFoundException e) { + return null; + } + } + + private void createTableIfDoesNotExist() throws DependencyException { + TableDescription tableDescription = getTableDescription(); + if (tableDescription == null) { + unwrappingFuture(DdbUtil.tableCreator( + WorkerMetricStats::getKeySchema, + WorkerMetricStats::getAttributeDefinitions, + tableConfig, + dynamoDbAsyncClient)); + tableDescription = getTableDescription(); + log.info("Table : {} created.", table.tableName()); + } else { + log.info("Table : {} already existing, skipping creation...", table.tableName()); + } + + if (tableDescription.tableStatus() != TableStatus.ACTIVE) { + log.info("Waiting for DDB Table: {} to become active", table.tableName()); + try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) { + final WaiterResponse response = + unwrappingFuture(() -> waiter.waitUntilTableExists( + r -> r.tableName(table.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10)))); + response.matched() + .response() + .orElseThrow(() -> new DependencyException(new IllegalStateException( + "Creating WorkerMetricStats table timed out", + response.matched().exception().orElse(null)))); + } + + unwrappingFuture(() -> DdbUtil.pitrEnabler(tableConfig, dynamoDbAsyncClient)); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsManager.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsManager.java new file mode 100644 index 000000000..c8c32be6f --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsManager.java @@ -0,0 +1,227 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Queue; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Queues; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.awssdk.utils.ThreadFactoryBuilder; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; +import software.amazon.kinesis.worker.metric.WorkerMetric; + +/** + * WorkerMetricStatsManager is a class that manages the collection of raw WorkerMetricStats values for the list of WorkerMetricStats + * periodically and store in a bounded in-memory queue. + * This class runs a periodic thread at every {@link #inMemoryStatsCaptureThreadFrequencyMillis} interval which + * captures each WorkerMetricStats's raw value and stores them in {@link #workerMetricsToRawHighFreqValuesMap} for each. + * When computeStats is invoked, the method drains the in-memory raw values queue for each WorkerMetricStats and computes the + * average and stores the computed average in #computedAverageStats for each WorkerMetricStats. + * For each WorkerMetricStats last {@link #maxMetricStatsCount} values are captured in {@link #computedAverageMetrics} + * + * This class is thread safe. + */ +@Slf4j +@KinesisClientInternalApi +public final class WorkerMetricStatsManager { + + /** + * 6 digit after decimal + */ + private static final int DEFAULT_AVERAGE_VALUES_DIGIT_AFTER_DECIMAL = 6; + + private static final String METRICS_OPERATION_WORKER_STATS_REPORTER = "WorkerMetricStatsReporter"; + static final String METRICS_IN_MEMORY_REPORTER_FAILURE = "InMemoryMetricStatsReporterFailure"; + // 1 value per sec gives 5 minutes worth of past data for 300 count which is sufficient. + // In case of reporter running more frequently than 5 minutes the queue will not reach this value anyway. + private static final int HIGH_FREQUENCY_STATS_COUNT = 300; + private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L; + + private final ScheduledExecutorService scheduledExecutorService; + /** + * Max count of values per WorkerMetricStats that is recorded in the storage. + */ + private final int maxMetricStatsCount; + /** + * List of WorkerMetricStats configured for the application, the values from these will be recorded in the storage. + */ + private final List workerMetricList; + /** + * Map of WorkerMetricStats to its trailing (#maxMetricStatsCount) values. + */ + @Getter(AccessLevel.PACKAGE) + private final Map> computedAverageMetrics; + /** + * Map of the WorkerMetricStats to its raw values since the last flush to storage was done. + */ + @Getter(AccessLevel.PACKAGE) + private final Map> workerMetricsToRawHighFreqValuesMap; + /** + * Frequency for capturing raw WorkerMetricsValues in millis. + */ + private final long inMemoryStatsCaptureThreadFrequencyMillis; + + private final MetricsFactory metricsFactory; + private ScheduledFuture managerProcessFuture; + + public WorkerMetricStatsManager( + final int maxMetricStatsCount, + final List workerMetricList, + final MetricsFactory metricsFactory, + long inMemoryStatsCaptureThreadFrequencyMillis) { + // Set thread as daemon to not block VM from exit. + this.scheduledExecutorService = Executors.newScheduledThreadPool( + 1, + new ThreadFactoryBuilder() + .daemonThreads(true) + .threadNamePrefix("worker-metrics-manager") + .build()); + this.maxMetricStatsCount = maxMetricStatsCount; + this.workerMetricList = workerMetricList; + this.computedAverageMetrics = new HashMap<>(); + this.workerMetricsToRawHighFreqValuesMap = new HashMap<>(); + this.metricsFactory = metricsFactory; + this.inMemoryStatsCaptureThreadFrequencyMillis = inMemoryStatsCaptureThreadFrequencyMillis; + init(); + } + + private void init() { + for (final WorkerMetric workerMetric : workerMetricList) { + computedAverageMetrics.put(workerMetric, EvictingQueue.create(maxMetricStatsCount)); + workerMetricsToRawHighFreqValuesMap.put( + workerMetric, Queues.synchronizedQueue(EvictingQueue.create(HIGH_FREQUENCY_STATS_COUNT))); + } + log.info( + "Completed initialization with maxMetricStatsCount : {} and total WorkerMetricStats : {}", + maxMetricStatsCount, + workerMetricList.size()); + } + + public void startManager() { + managerProcessFuture = scheduledExecutorService.scheduleWithFixedDelay( + this::recordWorkerMetrics, 0, inMemoryStatsCaptureThreadFrequencyMillis, TimeUnit.MILLISECONDS); + log.info("Started manager process..."); + } + + public void stopManager() { + if (managerProcessFuture != null) { + managerProcessFuture.cancel(false); + } + if (!scheduledExecutorService.isShutdown()) { + scheduledExecutorService.shutdown(); + try { + if (scheduledExecutorService.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + scheduledExecutorService.shutdownNow(); + } + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + log.warn("Interrupted when shutting down the scheduler, forcing shutdown", e); + scheduledExecutorService.shutdownNow(); + } + } + } + + private void recordWorkerMetrics() { + for (final WorkerMetric workerMetric : workerMetricList) { + final Optional value = fetchWorkerMetricsValue(workerMetric); + value.ifPresent(aDouble -> + workerMetricsToRawHighFreqValuesMap.get(workerMetric).add(aDouble)); + } + } + + private Optional fetchWorkerMetricsValue(final WorkerMetric workerMetric) { + try { + final Double value = workerMetric.capture().getValue(); + return Optional.of(value); + } catch (final Throwable throwable) { + log.error( + "WorkerMetricStats {} failure : ", + workerMetric.getWorkerMetricType().name(), + throwable); + final MetricsScope scope = + MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION_WORKER_STATS_REPORTER); + try { + scope.addData(METRICS_IN_MEMORY_REPORTER_FAILURE, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY); + } finally { + MetricsUtil.endScope(scope); + } + return Optional.empty(); + } + } + + /** + * Computes the metric stats for each WorkerMetricStats by averaging the values in inMemoryQueue and returns last + * {@link WorkerMetricStatsManager#maxMetricStatsCount } averaged values for each WorkerMetricStats. + * + * In the case of empty inMemoryQueue, computedStats has -1 value to denote that specific WorkerMetricStats has failed. + * @return Map of WorkerMetricStats shortName to averaged {@link WorkerMetricStatsManager#maxMetricStatsCount } values. + */ + public synchronized Map> computeMetrics() { + final Map> result = new HashMap<>(); + workerMetricsToRawHighFreqValuesMap.forEach((workerMetrics, statsQueue) -> { + final List currentWorkerMetricsStats = drainQueue(statsQueue); + + final Queue computedMetrics = computedAverageMetrics.get(workerMetrics); + + if (currentWorkerMetricsStats.isEmpty()) { + // In case currentWorkerMetricsStats is empty that means values from workerMetrics were not capture due + // to some + // reason, and thus there are no recent values, compute the value to be -1 to denote workerMetrics + // failure + computedMetrics.add(-1D); + } else { + computedMetrics.add(computeAverage(currentWorkerMetricsStats)); + } + + result.put(workerMetrics.getShortName(), new ArrayList<>(computedMetrics)); + }); + return result; + } + + /** + * Gets the operating range for each WorkerMetricStats that is registered. + * @return Map of WorkerMetricStats to list of two values, first value is max utilization, and second value is variance %. + */ + public Map> getOperatingRange() { + final Map> operatingRange = new HashMap<>(); + workerMetricList.forEach( + workerMetrics -> operatingRange.put(workerMetrics.getShortName(), ImmutableList.of((long) + workerMetrics.getOperatingRange().getMaxUtilization()))); + return operatingRange; + } + + private static List drainQueue(final Queue queue) { + final List elements = new ArrayList<>(); + final int queueLength = queue.size(); + for (int i = 0; i < queueLength; ++i) { + elements.add(queue.poll()); + } + return elements; + } + + private Double computeAverage(final List values) { + final double average = + values.stream().mapToDouble(Double::doubleValue).average().orElse(0D); + return BigDecimal.valueOf(average) + .setScale(DEFAULT_AVERAGE_VALUES_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP) + .doubleValue(); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsReporter.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsReporter.java new file mode 100644 index 000000000..340bf1ddc --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/metricstats/WorkerMetricStatsReporter.java @@ -0,0 +1,68 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.worker.metricstats; + +import java.time.Instant; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.metrics.MetricsUtil; + +/** + * Reporter that is periodically executed to report WorkerMetricStats. It collects + * the in memory metric stats and writes into the DDB WorkerMetricStats table. + */ +@Slf4j +@RequiredArgsConstructor +@KinesisClientInternalApi +public class WorkerMetricStatsReporter implements Runnable { + private final MetricsFactory metricsFactory; + private final String workerIdentifier; + private final WorkerMetricStatsManager workerMetricsManager; + private final WorkerMetricStatsDAO workerMetricsDAO; + + @Override + public void run() { + final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, "WorkerMetricStatsReporter"); + final long startTime = System.currentTimeMillis(); + boolean success = false; + try { + /* + * OperatingRange value fetched during the initialization and is same afterwards. It's possible + * to update OperatingRange only in first call and then skip, but we do not want to do that to avoid + * case where a worker can have a failure for some time and thus does not update the workerMetrics entry + * and LeaseAssigmentManager cleans it and then worker ends updating entry without operating range. + */ + final WorkerMetricStats workerMetrics = WorkerMetricStats.builder() + .workerId(workerIdentifier) + .metricStats(workerMetricsManager.computeMetrics()) + .operatingRange(workerMetricsManager.getOperatingRange()) + .lastUpdateTime(Instant.now().getEpochSecond()) + .build(); + workerMetricsDAO.updateMetrics(workerMetrics); + success = true; + } catch (final Exception e) { + log.error("Failed to update worker metric stats for worker : {}", workerIdentifier, e); + } finally { + MetricsUtil.addWorkerIdentifier(scope, workerIdentifier); + MetricsUtil.addSuccessAndLatency(scope, success, startTime, MetricsLevel.SUMMARY); + MetricsUtil.endScope(scope); + } + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/Ec2Resource.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/Ec2Resource.java new file mode 100644 index 000000000..bbc3dbfa5 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/Ec2Resource.java @@ -0,0 +1,111 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Optional; +import java.util.stream.Collectors; + +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.VisibleForTesting; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_PROC; + +/** + * Provides resource metadata for EC2. + */ +@KinesisClientInternalApi +@Slf4j +public class Ec2Resource implements ResourceMetadataProvider { + // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/retrieve-iid.html + private static final String IMDS_URL = "http://169.254.169.254/latest/dynamic/instance-identity/document"; + private static final String TOKEN_URL = "http://169.254.169.254/latest/api/token"; + private static final int EC2_INSTANCE_METADATA_TIMEOUT_MILLIS = 5000; + + private final UrlOpener identityDocumentUrl; + private final UrlOpener tokenUrl; + + @VisibleForTesting + Ec2Resource(UrlOpener identityDocumentUrl, UrlOpener tokenUrl) { + this.identityDocumentUrl = identityDocumentUrl; + this.tokenUrl = tokenUrl; + } + + /** + * Factory method to create an instance of Ec2Resource. + * + * @return Ec2Resource instance + */ + public static Ec2Resource create() { + try { + return new Ec2Resource(new UrlOpener(new URL(IMDS_URL)), new UrlOpener(new URL(TOKEN_URL))); + } catch (MalformedURLException e) { + // It should not throw unless it's unit testing. + throw new IllegalArgumentException(e); + } + } + + private boolean isEc2() { + try { + final HttpURLConnection connection = identityDocumentUrl.openConnection(); + connection.setRequestMethod("GET"); + // IMDS v2 requires IMDS token + connection.setRequestProperty("X-aws-ec2-metadata-token", fetchImdsToken()); + connection.setConnectTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS); + connection.setReadTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS); + if (connection.getResponseCode() == 200) { + return true; + } + } catch (Exception e) { + // TODO: probably need to add retries as well. + log.error("Unable to retrieve instance metadata", e); + } + return false; + } + + private String fetchImdsToken() { + try { + final HttpURLConnection connection = tokenUrl.openConnection(); + connection.setRequestMethod("PUT"); + connection.setRequestProperty("X-aws-ec2-metadata-token-ttl-seconds", "600"); + connection.setConnectTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS); + connection.setReadTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS); + if (connection.getResponseCode() == 200) { + return new BufferedReader(new InputStreamReader(tokenUrl.getInputStream(connection))) + .lines() + .collect(Collectors.joining()); + } + } catch (Exception e) { + log.warn( + "Unable to retrieve IMDS token. It could mean that the instance is not EC2 or is using IMDS V1", e); + } + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isOnPlatform() { + return isEc2(); + } + + /** + * {@inheritDoc} + */ + @Override + public ComputePlatform getPlatform() { + return ComputePlatform.EC2; + } + + /** + * {@inheritDoc} + */ + @Override + public Optional getOperatingRangeDataProvider() { + return Optional.of(LINUX_PROC).filter(OperatingRangeDataProvider::isProvider); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EcsResource.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EcsResource.java new file mode 100644 index 000000000..5bb526f6a --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EcsResource.java @@ -0,0 +1,59 @@ +package software.amazon.kinesis.worker.platform; + +import java.util.Map; +import java.util.Optional; + +import org.jetbrains.annotations.VisibleForTesting; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_ECS_METADATA_KEY_V4; + +/** + * Provides resource metadata for ECS. + */ +@KinesisClientInternalApi +public class EcsResource implements ResourceMetadataProvider { + static final String ECS_METADATA_KEY_V3 = "ECS_CONTAINER_METADATA_URI"; + static final String ECS_METADATA_KEY_V4 = "ECS_CONTAINER_METADATA_URI_V4"; + + private final Map sysEnv; + + @VisibleForTesting + EcsResource(Map sysEnv) { + this.sysEnv = sysEnv; + } + + /** + * Factory method to create an instance of EcsResource. + * + * @return an instance of EcsResource + */ + public static EcsResource create() { + return new EcsResource(System.getenv()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isOnPlatform() { + return !sysEnv.getOrDefault(ECS_METADATA_KEY_V3, "").isEmpty() + || !sysEnv.getOrDefault(ECS_METADATA_KEY_V4, "").isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public ComputePlatform getPlatform() { + return ComputePlatform.ECS; + } + + /** + * {@inheritDoc} + */ + @Override + public Optional getOperatingRangeDataProvider() { + return Optional.of(LINUX_ECS_METADATA_KEY_V4).filter(OperatingRangeDataProvider::isProvider); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EksResource.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EksResource.java new file mode 100644 index 000000000..ac3d76a46 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/EksResource.java @@ -0,0 +1,61 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.File; +import java.util.Optional; +import java.util.stream.Stream; + +import org.jetbrains.annotations.VisibleForTesting; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_EKS_CGROUP_V1; +import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_EKS_CGROUP_V2; + +/** + * Provides resource metadata for EKS. + */ +@KinesisClientInternalApi +public class EksResource implements ResourceMetadataProvider { + private static final String K8S_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"; + private final String k8sTokenPath; + + @VisibleForTesting + EksResource(String k8sTokenPath) { + this.k8sTokenPath = k8sTokenPath; + } + + /** + * Factory method to create an instance of EksResource. + * + * @return an instance of EksResource + */ + public static EksResource create() { + return new EksResource(K8S_TOKEN_PATH); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isOnPlatform() { + return new File(this.k8sTokenPath).exists(); + } + + /** + * {@inheritDoc} + */ + @Override + public ComputePlatform getPlatform() { + return ComputePlatform.EKS; + } + + /** + * {@inheritDoc} + */ + @Override + public Optional getOperatingRangeDataProvider() { + // It is only possible that either cgroupv1 or cgroupv2 is mounted + return Stream.of(LINUX_EKS_CGROUP_V2, LINUX_EKS_CGROUP_V1) + .filter(OperatingRangeDataProvider::isProvider) + .findFirst(); + } +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/OperatingRangeDataProvider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/OperatingRangeDataProvider.java new file mode 100644 index 000000000..abccf783a --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/OperatingRangeDataProvider.java @@ -0,0 +1,73 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.File; + +import static software.amazon.kinesis.worker.platform.EcsResource.ECS_METADATA_KEY_V4; + +/** + * Enum representing the different operating range metadata providers. + */ +public enum OperatingRangeDataProvider { + LINUX_EKS_CGROUP_V1 { + @Override + public boolean isProvider() { + if (!OperatingRangeDataProvider.isLinux()) { + return false; + } + // Check if the cgroup v2 specific file does NOT exist + final File cgroupV2File = new File("/sys/fs/cgroup/cgroup.controllers"); + if (cgroupV2File.exists()) { + return false; + } + + // Check for common cgroup v1 directories like memory or cpu + final File memoryCgroup = new File("/sys/fs/cgroup/memory"); + final File cpuCgroup = new File("/sys/fs/cgroup/cpu"); + + return memoryCgroup.exists() || cpuCgroup.exists(); + } + }, + LINUX_EKS_CGROUP_V2 { + @Override + public boolean isProvider() { + if (!OperatingRangeDataProvider.isLinux()) { + return false; + } + + // Check if the cgroup v2 specific file exists + final File cgroupV2File = new File("/sys/fs/cgroup/cgroup.controllers"); + + return cgroupV2File.exists(); + } + }, + LINUX_ECS_METADATA_KEY_V4 { + @Override + public boolean isProvider() { + if (!OperatingRangeDataProvider.isLinux()) { + return false; + } + return !System.getenv().getOrDefault(ECS_METADATA_KEY_V4, "").isEmpty(); + } + }, + LINUX_PROC { + @Override + public boolean isProvider() { + if (!OperatingRangeDataProvider.isLinux()) { + return false; + } + // Check if /proc directory exists (common in Linux environments) + return new File("/proc").exists(); + } + }; + + private static boolean isLinux() { + return System.getProperty("os.name").toLowerCase().contains("linux"); + } + + /** + * Abstract method to check if the provider is supported on the current platform. + * + * @return true if the provider is supported, false otherwise. + */ + public abstract boolean isProvider(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/ResourceMetadataProvider.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/ResourceMetadataProvider.java new file mode 100644 index 000000000..28619c190 --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/ResourceMetadataProvider.java @@ -0,0 +1,42 @@ +package software.amazon.kinesis.worker.platform; + +import java.util.Optional; + +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +/** + * Interface for providing resource metadata for worker. + */ +@KinesisClientInternalApi +public interface ResourceMetadataProvider { + /** + * Enum representing the different compute platforms. + */ + enum ComputePlatform { + EC2, + ECS, + EKS, + UNKNOWN + } + + /** + * Check if the worker is running on the specific platform. + * + * @return true if the worker is running on the specific platform, false otherwise. + */ + boolean isOnPlatform(); + + /** + * Get the name of the compute platform. + * + * @return the platform represent by the class. + */ + ComputePlatform getPlatform(); + + /** + * Get the operating range data provider. + * + * @return the operating range data provider. + */ + Optional getOperatingRangeDataProvider(); +} diff --git a/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/UrlOpener.java b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/UrlOpener.java new file mode 100644 index 000000000..80dfbb0ef --- /dev/null +++ b/amazon-kinesis-client/src/main/java/software/amazon/kinesis/worker/platform/UrlOpener.java @@ -0,0 +1,39 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; + +import lombok.RequiredArgsConstructor; +import software.amazon.kinesis.annotations.KinesisClientInternalApi; + +/** + * Utility class to open a URL and get the input stream. + */ +@RequiredArgsConstructor +@KinesisClientInternalApi +class UrlOpener { + private final URL url; + + /** + * Open the URL and return the connection. + * + * @return a HttpURLConnection. + * @throws IOException if a connection cannot be established. + */ + public HttpURLConnection openConnection() throws IOException { + return (HttpURLConnection) url.openConnection(); + } + + /** + * Get the input stream from the connection. + * + * @param connection the connection to get the input stream from. + * @return the InputStream for the data. + * @throws IOException if an error occurs while getting the input stream. + */ + public InputStream getInputStream(HttpURLConnection connection) throws IOException { + return connection.getInputStream(); + } +} diff --git a/amazon-kinesis-client/src/main/proto/messages.proto b/amazon-kinesis-client/src/main/proto/messages.proto index eebb32b14..28e81cd67 100644 --- a/amazon-kinesis-client/src/main/proto/messages.proto +++ b/amazon-kinesis-client/src/main/proto/messages.proto @@ -20,4 +20,4 @@ message AggregatedRecord { repeated string partition_key_table = 1; repeated string explicit_hash_key_table = 2; repeated Record records = 3; -} +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/README.md b/amazon-kinesis-client/src/test/data/ecstestdata/README.md new file mode 100644 index 000000000..f7e3fe990 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/README.md @@ -0,0 +1 @@ +Sample test ECS metadata for Amazon ECS task metadata v4. For more information, see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4-examples.html \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/root b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/stats b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/stats new file mode 100644 index 000000000..6e6e88258 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:61:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 0, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 0, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/task b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/task new file mode 100644 index 000000000..88344d11f --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noPrecpuStats/task @@ -0,0 +1,56 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 4, + "Memory": 128 + }, + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/root b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/stats b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/stats new file mode 100644 index 000000000..668c5ef67 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:61:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/task b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/task new file mode 100644 index 000000000..88344d11f --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noSystemCpuUsage/task @@ -0,0 +1,56 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 4, + "Memory": 128 + }, + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/root b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/stats b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/stats new file mode 100644 index 000000000..f0f201820 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:51:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/task b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/task new file mode 100644 index 000000000..82a555cbf --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer/task @@ -0,0 +1,55 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "Memory": 128 + }, + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/root b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/stats b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/stats new file mode 100644 index 000000000..f0f201820 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:51:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/task b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/task new file mode 100644 index 000000000..2826bf6dd --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitOneContainer/task @@ -0,0 +1,52 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/root b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/stats b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/stats new file mode 100644 index 000000000..b0fa34df5 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:61:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/task b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/task new file mode 100644 index 000000000..1e520961b --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/noTaskCpuLimitTwoContainers/task @@ -0,0 +1,96 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + }, + { + "DockerId": "ee08638adaaf009d78c248913f629e38299471d45fe7dc944d1039077e3424ca", + "Name": "curl", + "DockerName": "ecs-curltest-26-curl-a0e7dba5aca6d8cb2e00", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 30, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:06.326590752Z", + "StartedAt": "2020-10-02T00:43:06.767535449Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/158d1c8083dd49d6b527399fd6414f5c" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/abb51bdd-11b4-467f-8f6c-adcfe1fe059d", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/root b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/stats b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/stats new file mode 100644 index 000000000..b0fa34df5 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:61:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/task b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/task new file mode 100644 index 000000000..88344d11f --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitOneContainer/task @@ -0,0 +1,56 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 4, + "Memory": 128 + }, + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/root b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/root new file mode 100644 index 000000000..fba97724c --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/root @@ -0,0 +1,44 @@ +{ + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "curl", + "DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "24" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:15:07.620912337Z", + "StartedAt": "2020-10-02T00:15:08.062559351Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.100" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:9e:32:c7:48:85", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/stats b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/stats new file mode 100644 index 000000000..b0fa34df5 --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/stats @@ -0,0 +1,130 @@ +{ + "read": "2020-10-02T00:61:13.410254284Z", + "preread": "2020-10-02T00:51:12.406202398Z", + "pids_stats": { + "current": 3 + }, + "blkio_stats": { + "io_service_bytes_recursive": [ + + ], + "io_serviced_recursive": [ + + ], + "io_queue_recursive": [ + + ], + "io_service_time_recursive": [ + + ], + "io_wait_time_recursive": [ + + ], + "io_merged_recursive": [ + + ], + "io_time_recursive": [ + + ], + "sectors_recursive": [ + + ] + }, + "num_procs": 0, + "storage_stats": { + + }, + "cpu_stats": { + "cpu_usage": { + "total_usage": 150000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 200000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "precpu_stats": { + "cpu_usage": { + "total_usage": 100000000, + "percpu_usage": [ + 182359190, + 178608875 + ], + "usage_in_kernelmode": 40000000, + "usage_in_usermode": 290000000 + }, + "system_cpu_usage": 100000000, + "online_cpus": 2, + "throttling_data": { + "periods": 0, + "throttled_periods": 0, + "throttled_time": 0 + } + }, + "memory_stats": { + "usage": 1806336, + "max_usage": 6299648, + "stats": { + "active_anon": 606208, + "active_file": 0, + "cache": 0, + "dirty": 0, + "hierarchical_memory_limit": 134217728, + "hierarchical_memsw_limit": 268435456, + "inactive_anon": 0, + "inactive_file": 0, + "mapped_file": 0, + "pgfault": 4185, + "pgmajfault": 0, + "pgpgin": 2926, + "pgpgout": 2778, + "rss": 606208, + "rss_huge": 0, + "total_active_anon": 606208, + "total_active_file": 0, + "total_cache": 0, + "total_dirty": 0, + "total_inactive_anon": 0, + "total_inactive_file": 0, + "total_mapped_file": 0, + "total_pgfault": 4185, + "total_pgmajfault": 0, + "total_pgpgin": 2926, + "total_pgpgout": 2778, + "total_rss": 606208, + "total_rss_huge": 0, + "total_unevictable": 0, + "total_writeback": 0, + "unevictable": 0, + "writeback": 0 + }, + "limit": 134217728 + }, + "name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01", + "id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af", + "networks": { + "eth0": { + "rx_bytes": 84, + "rx_packets": 2, + "rx_errors": 0, + "rx_dropped": 0, + "tx_bytes": 84, + "tx_packets": 2, + "tx_errors": 0, + "tx_dropped": 0 + } + }, + "network_rate_stats": { + "rx_bytes_per_sec": 0, + "tx_bytes_per_sec": 0 + } +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/task b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/task new file mode 100644 index 000000000..1e520961b --- /dev/null +++ b/amazon-kinesis-client/src/test/data/ecstestdata/taskCpuLimitTwoContainers/task @@ -0,0 +1,96 @@ +{ + "Cluster": "default", + "TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "Family": "curltest", + "ServiceName": "MyService", + "Revision": "26", + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "PullStartedAt": "2020-10-02T00:43:06.202617438Z", + "PullStoppedAt": "2020-10-02T00:43:06.31288465Z", + "AvailabilityZone": "us-west-2d", + "VPCID": "vpc-1234567890abcdef0", + "LaunchType": "EC2", + "Containers": [ + { + "DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66", + "Name": "~internal~ecs~pause", + "DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00", + "Image": "amazon/amazon-ecs-pause:0.1.0", + "ImageID": "", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "~internal~ecs~pause", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RESOURCES_PROVISIONED", + "KnownStatus": "RESOURCES_PROVISIONED", + "Limits": { + "CPU": 50, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:05.602352471Z", + "StartedAt": "2020-10-02T00:43:06.076707576Z", + "Type": "CNI_PAUSE", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + }, + { + "DockerId": "ee08638adaaf009d78c248913f629e38299471d45fe7dc944d1039077e3424ca", + "Name": "curl", + "DockerName": "ecs-curltest-26-curl-a0e7dba5aca6d8cb2e00", + "Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest", + "ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553", + "Labels": { + "com.amazonaws.ecs.cluster": "default", + "com.amazonaws.ecs.container-name": "curl", + "com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c", + "com.amazonaws.ecs.task-definition-family": "curltest", + "com.amazonaws.ecs.task-definition-version": "26" + }, + "DesiredStatus": "RUNNING", + "KnownStatus": "RUNNING", + "Limits": { + "CPU": 30, + "Memory": 128 + }, + "CreatedAt": "2020-10-02T00:43:06.326590752Z", + "StartedAt": "2020-10-02T00:43:06.767535449Z", + "Type": "NORMAL", + "LogDriver": "awslogs", + "LogOptions": { + "awslogs-create-group": "true", + "awslogs-group": "/ecs/metadata", + "awslogs-region": "us-west-2", + "awslogs-stream": "ecs/curl/158d1c8083dd49d6b527399fd6414f5c" + }, + "ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/abb51bdd-11b4-467f-8f6c-adcfe1fe059d", + "Networks": [ + { + "NetworkMode": "awsvpc", + "IPv4Addresses": [ + "10.0.2.61" + ], + "AttachmentIndex": 0, + "MACAddress": "0e:10:e2:01:bd:91", + "IPv4SubnetCIDRBlock": "10.0.2.0/24", + "PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal", + "SubnetGatewayIpv4Address": "10.0.2.1/24" + } + ] + } + ] +} \ No newline at end of file diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/checkpoint/ShardShardRecordProcessorCheckpointerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/checkpoint/ShardShardRecordProcessorCheckpointerTest.java index a198dcef0..aaf6cf642 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/checkpoint/ShardShardRecordProcessorCheckpointerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/checkpoint/ShardShardRecordProcessorCheckpointerTest.java @@ -47,7 +47,7 @@ public class ShardShardRecordProcessorCheckpointerTest { private String shardId = "shardId-123"; /** - * @throws java.lang.Exception + * @throws Exception */ @Before public void setup() throws Exception { diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/CoordinatorStateDAOTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/CoordinatorStateDAOTest.java new file mode 100644 index 000000000..b2dbcfc23 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/CoordinatorStateDAOTest.java @@ -0,0 +1,620 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; + +import com.amazonaws.services.dynamodbv2.AcquireLockOptions; +import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient; +import com.amazonaws.services.dynamodbv2.LockItem; +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.amazonaws.services.dynamodbv2.local.shared.access.AmazonDynamoDBLocal; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import software.amazon.awssdk.core.internal.waiters.DefaultWaiterResponse; +import software.amazon.awssdk.core.waiters.WaiterResponse; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.ScanRequest; +import software.amazon.awssdk.services.dynamodb.model.ScanResponse; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; +import software.amazon.awssdk.services.dynamodb.model.TableStatus; +import software.amazon.awssdk.services.dynamodb.model.Tag; +import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest; +import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter; +import software.amazon.kinesis.common.FutureUtils; +import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig; +import software.amazon.kinesis.coordinator.migration.ClientVersion; +import software.amazon.kinesis.coordinator.migration.MigrationState; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME; +import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY; +import static software.amazon.kinesis.coordinator.migration.MigrationState.CLIENT_VERSION_ATTRIBUTE_NAME; +import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY; + +@Slf4j +public class CoordinatorStateDAOTest { + private static final String WORKER_ID = "CoordinatorStateDAOTestWorker"; + private final AmazonDynamoDBLocal embeddedDdb = DynamoDBEmbedded.create(); + private final DynamoDbAsyncClient dynamoDbAsyncClient = embeddedDdb.dynamoDbAsyncClient(); + private String tableNameForTest; + + @Test + public void testProvisionedTableCreation_DefaultTableName() + throws ExecutionException, InterruptedException, DependencyException { + /* Test setup - create class under test **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, + getCoordinatorStateConfig( + "testProvisionedTableCreation", + ProvisionedThroughput.builder() + .writeCapacityUnits(30L) + .readCapacityUnits(15L) + .build())); + + /* Test step - initialize to create the table **/ + doaUnderTest.initialize(); + + /* Verify - table with correct configuration is created */ + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName("testProvisionedTableCreation-CoordinatorState") + .build()) + .get(); + + Assertions.assertEquals( + 15L, + response.table().provisionedThroughput().readCapacityUnits().longValue()); + Assertions.assertEquals( + 30L, + response.table().provisionedThroughput().writeCapacityUnits().longValue()); + } + + @Test + public void testTableCreationWithDeletionProtection_assertDeletionProtectionEnabled() + throws DependencyException, ExecutionException, InterruptedException { + + final CoordinatorStateTableConfig config = getCoordinatorStateConfig( + "testTableCreationWithDeletionProtection", + ProvisionedThroughput.builder() + .writeCapacityUnits(30L) + .readCapacityUnits(15L) + .build()); + config.deletionProtectionEnabled(true); + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO(dynamoDbAsyncClient, config); + + doaUnderTest.initialize(); + + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName("testTableCreationWithDeletionProtection-CoordinatorState") + .build()) + .get(); + + Assertions.assertTrue(response.table().deletionProtectionEnabled()); + } + + /** + * DynamoDBLocal does not support PITR and tags and thus this test is using mocks. + */ + @Test + public void testTableCreationWithTagsAndPitr_assertTags() throws DependencyException { + final DynamoDbAsyncWaiter waiter = mock(DynamoDbAsyncWaiter.class); + final WaiterResponse waiterResponse = DefaultWaiterResponse.builder() + .response(dummyDescribeTableResponse(TableStatus.ACTIVE)) + .attemptsExecuted(1) + .build(); + when(waiter.waitUntilTableExists(any(Consumer.class), any(Consumer.class))) + .thenReturn(CompletableFuture.completedFuture((WaiterResponse) waiterResponse)); + final DynamoDbAsyncClient dbAsyncClient = mock(DynamoDbAsyncClient.class); + when(dbAsyncClient.waiter()).thenReturn(waiter); + when(dbAsyncClient.createTable(any(CreateTableRequest.class))) + .thenReturn(CompletableFuture.completedFuture(CreateTableResponse.builder() + .tableDescription( + dummyDescribeTableResponse(TableStatus.CREATING).table()) + .build())); + when(dbAsyncClient.updateContinuousBackups(any(UpdateContinuousBackupsRequest.class))) + .thenReturn(CompletableFuture.completedFuture(null)); + when(dbAsyncClient.describeTable(any(DescribeTableRequest.class))) + .thenThrow(ResourceNotFoundException.builder().build()) + .thenReturn(CompletableFuture.completedFuture(dummyDescribeTableResponse(TableStatus.ACTIVE))); + + final ArgumentCaptor createTableRequestArgumentCaptor = + ArgumentCaptor.forClass(CreateTableRequest.class); + final ArgumentCaptor updateContinuousBackupsRequestArgumentCaptor = + ArgumentCaptor.forClass(UpdateContinuousBackupsRequest.class); + + final CoordinatorStateTableConfig config = getCoordinatorStateConfig( + "testTableCreationWithTagsAndPitr", + ProvisionedThroughput.builder() + .writeCapacityUnits(30L) + .readCapacityUnits(15L) + .build()); + config.tableName("testTableCreationWithTagsAndPitr"); + config.pointInTimeRecoveryEnabled(true); + config.tags( + Collections.singleton(Tag.builder().key("Key").value("Value").build())); + + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO(dbAsyncClient, config); + doaUnderTest.initialize(); + + verify(dbAsyncClient).createTable(createTableRequestArgumentCaptor.capture()); + verify(dbAsyncClient).updateContinuousBackups(updateContinuousBackupsRequestArgumentCaptor.capture()); + Assertions.assertEquals( + 1, createTableRequestArgumentCaptor.getValue().tags().size()); + + Assertions.assertEquals( + "Key", createTableRequestArgumentCaptor.getValue().tags().get(0).key()); + Assertions.assertEquals( + "Value", + createTableRequestArgumentCaptor.getValue().tags().get(0).value()); + Assertions.assertTrue(updateContinuousBackupsRequestArgumentCaptor + .getAllValues() + .get(0) + .pointInTimeRecoverySpecification() + .pointInTimeRecoveryEnabled()); + } + + private static DescribeTableResponse dummyDescribeTableResponse(final TableStatus tableStatus) { + return DescribeTableResponse.builder() + .table(TableDescription.builder().tableStatus(tableStatus).build()) + .build(); + } + + @Test + public void testPayPerUseTableCreation_DefaultTableName() + throws ExecutionException, InterruptedException, DependencyException { + /* Test setup - create class under test **/ + final CoordinatorConfig c = new CoordinatorConfig("testPayPerUseTableCreation"); + c.coordinatorStateTableConfig().billingMode(BillingMode.PAY_PER_REQUEST); + + final CoordinatorStateDAO doaUnderTest = + new CoordinatorStateDAO(dynamoDbAsyncClient, c.coordinatorStateTableConfig()); + + /* Test step - initialize to create the table **/ + doaUnderTest.initialize(); + + /* Verify - table with correct configuration is created */ + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName("testPayPerUseTableCreation-CoordinatorState") + .build()) + .get(); + + Assertions.assertEquals( + BillingMode.PAY_PER_REQUEST, + response.table().billingModeSummary().billingMode()); + } + + @Test + public void testProvisionedTableCreation_CustomTableName() + throws ExecutionException, InterruptedException, DependencyException { + /* Test setup - create class under test **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, + getCoordinatorStateConfig( + "TestApplicationName", + BillingMode.PROVISIONED, + ProvisionedThroughput.builder() + .readCapacityUnits(10L) + .writeCapacityUnits(20L) + .build(), + "MyCustomTableName-testProvisionedTableCreation")); + + /* Test step - initialize to create the table **/ + doaUnderTest.initialize(); + + /* Verify - table with correct configuration is created */ + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName("MyCustomTableName-testProvisionedTableCreation") + .build()) + .get(); + + Assertions.assertEquals( + 10L, + response.table().provisionedThroughput().readCapacityUnits().longValue()); + Assertions.assertEquals( + 20L, + response.table().provisionedThroughput().writeCapacityUnits().longValue()); + } + + @Test + public void testPayPerUseTableCreation_CustomTableName() + throws ExecutionException, InterruptedException, DependencyException { + /* Test setup - create class under test **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, + getCoordinatorStateConfig( + "TestApplicationName", + BillingMode.PAY_PER_REQUEST, + null, + "MyCustomTableName-testPayPerUseTableCreation")); + + /* Test step - initialize to create the table **/ + doaUnderTest.initialize(); + + /* Verify - table with correct configuration is created */ + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName("MyCustomTableName-testPayPerUseTableCreation") + .build()) + .get(); + + Assertions.assertEquals( + BillingMode.PAY_PER_REQUEST, + response.table().billingModeSummary().billingMode()); + } + + @Test + public void testCreatingLeaderAndMigrationKey() + throws ProvisionedThroughputException, InvalidStateException, DependencyException, InterruptedException, + IOException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + + /* Test steps - create migration item, DDB lease election lock item, and another item with different schema **/ + createCoordinatorState("key1"); + + final MigrationState migrationState = new MigrationState(MIGRATION_HASH_KEY, WORKER_ID) + .update(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X, WORKER_ID); + doaUnderTest.createCoordinatorStateIfNotExists(migrationState); + + final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(doaUnderTest + .getDDBLockClientOptionsBuilder() + .withOwnerName("TEST_WORKER") + .withCreateHeartbeatBackgroundThread(true) + .build()); + final Optional optionalItem = dynamoDBLockClient.tryAcquireLock( + AcquireLockOptions.builder(LEADER_HASH_KEY).build()); + Assertions.assertTrue(optionalItem.isPresent(), "Lock was not acquired"); + + final AmazonDynamoDBLockClient worker2DynamoDBLockClient = new AmazonDynamoDBLockClient(doaUnderTest + .getDDBLockClientOptionsBuilder() + .withOwnerName("TEST_WORKER_2") + .withCreateHeartbeatBackgroundThread(true) + .build()); + final Optional worker2OptionalItem = worker2DynamoDBLockClient.tryAcquireLock( + AcquireLockOptions.builder(LEADER_HASH_KEY).build()); + Assertions.assertFalse(worker2OptionalItem.isPresent(), "Second worker was able to acquire the lock"); + + /* Verify - both items are present with the corresponding content */ + final ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan( + ScanRequest.builder().tableName(tableNameForTest).build())); + log.info("response {}", response); + + Assertions.assertEquals(3, response.scannedCount(), "incorrect item count"); + response.items().forEach(item -> { + final String key = + item.get(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME).s(); + if (MIGRATION_HASH_KEY.equals(key)) { + // Make sure the record has not changed due to using + // ddb lock client + Assertions.assertEquals( + ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X.toString(), + item.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()); + } else if (LEADER_HASH_KEY.equals(key)) { + Assertions.assertEquals("TEST_WORKER", item.get("ownerName").s()); + } else if ("key1".equals(key)) { + Assertions.assertEquals(4, item.size()); + Assertions.assertEquals("key1_strVal", item.get("key1-StrAttr").s()); + Assertions.assertEquals( + 100, Integer.valueOf(item.get("key1-IntAttr").n())); + Assertions.assertEquals(true, item.get("key1-BoolAttr").bool()); + } + }); + + dynamoDBLockClient.close(); + worker2DynamoDBLockClient.close(); + } + + @Test + public void testListCoordinatorState() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = + new CoordinatorStateDAO(dynamoDbAsyncClient, getCoordinatorStateConfig("testListCoordinatorState")); + doaUnderTest.initialize(); + + /* Test step - create a few coordinatorState items with different schema and invoke the test to list items */ + createCoordinatorState("key1"); + createCoordinatorState("key2"); + createCoordinatorState("key3"); + createCoordinatorState("key4"); + createMigrationState(); + + final List stateList = doaUnderTest.listCoordinatorState(); + + /* Verify **/ + Assertions.assertEquals(5, stateList.size()); + stateList.forEach(state -> { + final String keyValue = state.getKey(); + if ("Migration3.0".equals(keyValue)) { + Assertions.assertTrue(state instanceof MigrationState); + final MigrationState migrationState = (MigrationState) state; + Assertions.assertEquals(ClientVersion.CLIENT_VERSION_3X, migrationState.getClientVersion()); + return; + } + Assertions.assertEquals(3, state.getAttributes().size()); + Assertions.assertEquals( + keyValue + "_strVal", + state.getAttributes().get(keyValue + "-StrAttr").s()); + Assertions.assertEquals( + 100, + Integer.valueOf( + state.getAttributes().get(keyValue + "-IntAttr").n())); + Assertions.assertEquals( + true, state.getAttributes().get(keyValue + "-BoolAttr").bool()); + }); + } + + @Test + public void testCreateCoordinatorState_ItemNotExists() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + + /* Test step - create a few coordinatorState items with different schema and invoke the test to list items */ + final CoordinatorState s1 = CoordinatorState.builder() + .key("key1") + .attributes(new HashMap() { + { + put("abc", AttributeValue.fromS("abc")); + put("xyz", AttributeValue.fromS("xyz")); + } + }) + .build(); + final boolean result = doaUnderTest.createCoordinatorStateIfNotExists(s1); + + /* Verify - insert succeeded and item matches **/ + Assertions.assertTrue(result); + final CoordinatorState stateFromDdb = doaUnderTest.getCoordinatorState("key1"); + Assertions.assertEquals(s1, stateFromDdb); + } + + @Test + public void testCreateCoordinatorState_ItemExists() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + createCoordinatorState("key1"); + + /* Test step - create a few coordinatorState items with different schema and invoke the test to list items */ + final CoordinatorState s1 = CoordinatorState.builder() + .key("key1") + .attributes(new HashMap() { + { + put("abc", AttributeValue.fromS("abc")); + put("xyz", AttributeValue.fromS("xyz")); + } + }) + .build(); + final boolean result = doaUnderTest.createCoordinatorStateIfNotExists(s1); + + /* Verify - insert succeeded and item matches **/ + Assertions.assertFalse(result); + final CoordinatorState stateFromDdb = doaUnderTest.getCoordinatorState("key1"); + Assertions.assertNotEquals(s1, stateFromDdb); + } + + @Test + public void testUpdateCoordinatorStateWithExpectation_Success() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + createCoordinatorState("key1"); + + /* Test step - update the state */ + final CoordinatorState updatedState = CoordinatorState.builder() + .key("key1") + .attributes(new HashMap() { + { + put("key1-StrAttr", AttributeValue.fromS("key1_strVal")); + put("key1-IntAttr", AttributeValue.fromN("200")); + put("key1-BoolAttr", AttributeValue.fromBool(false)); + } + }) + .build(); + + final boolean updated = doaUnderTest.updateCoordinatorStateWithExpectation( + updatedState, new HashMap() { + { + put( + "key1-StrAttr", + ExpectedAttributeValue.builder() + .value(AttributeValue.fromS("key1_strVal")) + .build()); + } + }); + + /* Verify - update succeeded **/ + Assertions.assertTrue(updated); + } + + @Test + public void testUpdateCoordinatorStateWithExpectation_ConditionFailed() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + final MigrationState state = createMigrationState(); + + /* Test step - update the state with mismatched condition */ + final MigrationState updatedState = state.copy().update(ClientVersion.CLIENT_VERSION_2X, WORKER_ID); + + boolean updated = doaUnderTest.updateCoordinatorStateWithExpectation( + updatedState, updatedState.getDynamoClientVersionExpectation()); + + /* Verify - update failed **/ + Assertions.assertFalse(updated); + + /* Verify - update succeeded **/ + final MigrationState currentState = (MigrationState) doaUnderTest.getCoordinatorState("Migration3.0"); + updated = doaUnderTest.updateCoordinatorStateWithExpectation( + updatedState, currentState.getDynamoClientVersionExpectation()); + Assertions.assertTrue(updated); + final GetItemResponse response = dynamoDbAsyncClient + .getItem(GetItemRequest.builder() + .tableName(tableNameForTest) + .key(new HashMap() { + { + put("key", AttributeValue.fromS("Migration3.0")); + } + }) + .build()) + .join(); + Assertions.assertEquals( + ClientVersion.CLIENT_VERSION_2X.name(), + response.item().get("cv").s()); + Assertions.assertEquals(WORKER_ID, response.item().get("mb").s()); + Assertions.assertEquals( + String.valueOf(updatedState.getModifiedTimestamp()), + response.item().get("mts").n()); + Assertions.assertEquals(1, response.item().get("h").l().size()); + Assertions.assertEquals( + state.getClientVersion().name(), + response.item().get("h").l().get(0).m().get("cv").s()); + Assertions.assertEquals( + state.getModifiedBy(), + response.item().get("h").l().get(0).m().get("mb").s()); + Assertions.assertEquals( + String.valueOf(state.getModifiedTimestamp()), + response.item().get("h").l().get(0).m().get("mts").n()); + + log.info("Response {}", response); + } + + @Test + public void testUpdateCoordinatorStateWithExpectation_NonExistentKey() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + /* Test setup - create class under test and initialize **/ + final CoordinatorStateDAO doaUnderTest = new CoordinatorStateDAO( + dynamoDbAsyncClient, getCoordinatorStateConfig("testCreatingLeaderAndMigrationKey")); + doaUnderTest.initialize(); + + /* Test step - update with new state object */ + final MigrationState updatedState = + new MigrationState("Migration3.0", WORKER_ID).update(ClientVersion.CLIENT_VERSION_2X, WORKER_ID); + + boolean updated = doaUnderTest.updateCoordinatorStateWithExpectation(updatedState, null); + + /* Verify - update failed **/ + Assertions.assertFalse(updated); + } + + private CoordinatorStateTableConfig getCoordinatorStateConfig(final String applicationName) { + return getCoordinatorStateConfig(applicationName, BillingMode.PAY_PER_REQUEST, null, null); + } + + private CoordinatorStateTableConfig getCoordinatorStateConfig( + final String applicationName, final ProvisionedThroughput throughput) { + return getCoordinatorStateConfig(applicationName, BillingMode.PROVISIONED, throughput, null); + } + + private CoordinatorStateTableConfig getCoordinatorStateConfig( + final String applicationName, + final BillingMode mode, + final ProvisionedThroughput throughput, + final String tableName) { + final CoordinatorConfig c = new CoordinatorConfig(applicationName); + c.coordinatorStateTableConfig().billingMode(mode); + if (tableName != null) { + c.coordinatorStateTableConfig().tableName(tableName); + } + if (mode == BillingMode.PROVISIONED) { + c.coordinatorStateTableConfig() + .writeCapacity(throughput.writeCapacityUnits()) + .readCapacity(throughput.readCapacityUnits()); + } + + tableNameForTest = c.coordinatorStateTableConfig().tableName(); + + return c.coordinatorStateTableConfig(); + } + + private void createCoordinatorState(final String keyValue) { + dynamoDbAsyncClient + .putItem(PutItemRequest.builder() + .tableName(tableNameForTest) + .item(new HashMap() { + { + put("key", AttributeValue.fromS(keyValue)); + put(keyValue + "-StrAttr", AttributeValue.fromS(keyValue + "_strVal")); + put(keyValue + "-IntAttr", AttributeValue.fromN("100")); + put(keyValue + "-BoolAttr", AttributeValue.fromBool(true)); + } + }) + .build()) + .join(); + } + + private MigrationState createMigrationState() { + final HashMap item = new HashMap() { + { + put("key", AttributeValue.fromS("Migration3.0")); + put("cv", AttributeValue.fromS(ClientVersion.CLIENT_VERSION_3X.toString())); + put("mb", AttributeValue.fromS("DUMMY_WORKER")); + put("mts", AttributeValue.fromN(String.valueOf(System.currentTimeMillis()))); + } + }; + + dynamoDbAsyncClient + .putItem(PutItemRequest.builder() + .tableName(tableNameForTest) + .item(item) + .build()) + .join(); + + item.remove("key"); + + return MigrationState.deserialize("Migration3.0", item); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDeciderTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDeciderTest.java index bbed04d3a..0a505f516 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDeciderTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DeterministicShuffleShardSyncLeaderDeciderTest.java @@ -33,6 +33,8 @@ import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import static org.junit.Assert.assertFalse; @@ -48,6 +50,7 @@ public class DeterministicShuffleShardSyncLeaderDeciderTest { private static final String LEASE_KEY = "lease_key"; private static final String LEASE_OWNER = "lease_owner"; private static final String WORKER_ID = "worker-id"; + private static final MetricsFactory NULL_METRICS_FACTORY = new NullMetricsFactory(); private DeterministicShuffleShardSyncLeaderDecider leaderDecider; @@ -66,7 +69,7 @@ public class DeterministicShuffleShardSyncLeaderDeciderTest { public void setup() { numShardSyncWorkers = 1; leaderDecider = new DeterministicShuffleShardSyncLeaderDecider( - leaseRefresher, scheduledExecutorService, numShardSyncWorkers, readWriteLock); + leaseRefresher, scheduledExecutorService, numShardSyncWorkers, readWriteLock, NULL_METRICS_FACTORY); when(readWriteLock.readLock()).thenReturn(mock(ReentrantReadWriteLock.ReadLock.class)); when(readWriteLock.writeLock()).thenReturn(mock(ReentrantReadWriteLock.WriteLock.class)); @@ -122,7 +125,7 @@ public void testElectedLeadersAsPerExpectedShufflingOrder() throws Exception { public void testElectedLeadersAsPerExpectedShufflingOrderWhenUniqueWorkersLessThanMaxLeaders() { this.numShardSyncWorkers = 5; // More than number of unique lease owners leaderDecider = new DeterministicShuffleShardSyncLeaderDecider( - leaseRefresher, scheduledExecutorService, numShardSyncWorkers, readWriteLock); + leaseRefresher, scheduledExecutorService, numShardSyncWorkers, readWriteLock, NULL_METRICS_FACTORY); List leases = getLeases(3, false /*emptyLeaseOwner */, false /* duplicateLeaseOwner */, true /* activeLeases */); Set expectedLeaders = getExpectedLeaders(leases); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializerTest.java new file mode 100644 index 000000000..6d9cfb985 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamicMigrationComponentsInitializerTest.java @@ -0,0 +1,369 @@ +package software.amazon.kinesis.coordinator; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.BiFunction; +import java.util.function.Supplier; + +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode; +import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager; +import software.amazon.kinesis.coordinator.migration.ClientVersion; +import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider; +import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.RETURNS_MOCKS; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@Slf4j +public class DynamicMigrationComponentsInitializerTest { + + private DynamicMigrationComponentsInitializer migrationInitializer; + + private final MetricsFactory mockMetricsFactory = new NullMetricsFactory(); + private final LeaseRefresher mockLeaseRefresher = mock(LeaseRefresher.class, Mockito.RETURNS_MOCKS); + private final CoordinatorStateDAO mockCoordinatorStateDAO = mock(CoordinatorStateDAO.class, Mockito.RETURNS_MOCKS); + private final ScheduledExecutorService mockWorkerMetricsScheduler = + mock(ScheduledExecutorService.class, RETURNS_MOCKS); + private final WorkerMetricStatsDAO mockWorkerMetricsDAO = mock(WorkerMetricStatsDAO.class, RETURNS_MOCKS); + private final WorkerMetricStatsManager mockWorkerMetricsManager = + mock(WorkerMetricStatsManager.class, RETURNS_MOCKS); + private final ScheduledExecutorService mockLamThreadPool = mock(ScheduledExecutorService.class, RETURNS_MOCKS); + private final LeaseAssignmentManager mockLam = mock(LeaseAssignmentManager.class, RETURNS_MOCKS); + private final BiFunction mockLamCreator = + mock(LeaseAssignmentManagerSupplier.class); + private final MigrationAdaptiveLeaderDecider mockMigrationAdaptiveLeaderDecider = + mock(MigrationAdaptiveLeaderDecider.class); + private final Supplier mockAdaptiveLeaderDeciderCreator = + mock(MigrationAdaptiveLeaderDeciderSupplier.class); + private final DeterministicShuffleShardSyncLeaderDecider mockDeterministicLeaderDecider = + mock(DeterministicShuffleShardSyncLeaderDecider.class); + private final Supplier mockDeterministicLeaderDeciderCreator = + mock(DeterministicShuffleShardSyncLeaderDeciderSupplier.class); + private final DynamoDBLockBasedLeaderDecider mockDdbLockLeaderDecider = mock(DynamoDBLockBasedLeaderDecider.class); + private final Supplier mockDdbLockBasedLeaderDeciderCreator = + mock(DynamoDBLockBasedLeaderDeciderSupplier.class); + private final String workerIdentifier = "TEST_WORKER_ID"; + private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig = + new WorkerUtilizationAwareAssignmentConfig(); + final MigrationAdaptiveLeaseAssignmentModeProvider mockConsumer = + mock(MigrationAdaptiveLeaseAssignmentModeProvider.class); + + private static final String APPLICATION_NAME = "TEST_APPLICATION"; + + @BeforeEach + public void setup() { + workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig(new WorkerMetricsTableConfig(APPLICATION_NAME)); + when(mockLamCreator.apply(any(), anyObject())).thenReturn(mockLam); + when(mockAdaptiveLeaderDeciderCreator.get()).thenReturn(mockMigrationAdaptiveLeaderDecider); + when(mockDdbLockBasedLeaderDeciderCreator.get()).thenReturn(mockDdbLockLeaderDecider); + when(mockDeterministicLeaderDeciderCreator.get()).thenReturn(mockDeterministicLeaderDecider); + + migrationInitializer = new DynamicMigrationComponentsInitializer( + mockMetricsFactory, + mockLeaseRefresher, + mockCoordinatorStateDAO, + mockWorkerMetricsScheduler, + mockWorkerMetricsDAO, + mockWorkerMetricsManager, + mockLamThreadPool, + mockLamCreator, + mockAdaptiveLeaderDeciderCreator, + mockDeterministicLeaderDeciderCreator, + mockDdbLockBasedLeaderDeciderCreator, + workerIdentifier, + workerUtilizationAwareAssignmentConfig, + mockConsumer); + } + + @Test + public void testInitialize_ClientVersion3_X() throws DependencyException { + // Test initializing to verify correct leader decider is created + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X); + + verify(mockWorkerMetricsManager).startManager(); + verify(mockDdbLockBasedLeaderDeciderCreator).get(); + verify(mockAdaptiveLeaderDeciderCreator, never()).get(); + verify(mockDeterministicLeaderDeciderCreator, never()).get(); + verify(mockLamCreator).apply(eq(mockLamThreadPool), eq(migrationInitializer.leaderDecider())); + + // verify LeaseAssignmentModeChange consumer initialization + verify(mockConsumer).initialize(eq(false), eq(LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT)); + + when(mockLeaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong())) + .thenReturn(true); + + // test initialization from state machine + migrationInitializer.initializeClientVersionFor3x(ClientVersion.CLIENT_VERSION_INIT); + verify(mockWorkerMetricsDAO).initialize(); + verify(mockWorkerMetricsScheduler).scheduleAtFixedRate(any(), anyLong(), anyLong(), any()); + // verify that GSI will be created if it doesn't exist + verify(mockLeaseRefresher).createLeaseOwnerToLeaseKeyIndexIfNotExists(); + // and it will block for the creation + verify(mockLeaseRefresher).waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong()); + verify(mockDdbLockLeaderDecider).initialize(); + verify(mockLam).start(); + } + + /** + * exactly same as above except: + * 1. migration adaptive leader decider will be created in addition to ddb lock leader decider. + * 2. dynamicModeChangeSupportNeeded is returned as true to LeaseAssignmentModeChange notification consumer + * 3. gsi creation is not triggered + */ + @Test + public void testInitialize_ClientVersion_3_xWithRollback() throws DependencyException { + // Test initializing to verify correct leader decider is created + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + + verify(mockWorkerMetricsManager).startManager(); + + verify(mockDdbLockBasedLeaderDeciderCreator).get(); + verify(mockAdaptiveLeaderDeciderCreator).get(); + verify(mockDeterministicLeaderDeciderCreator, never()).get(); + verify(mockLamCreator).apply(eq(mockLamThreadPool), eq(migrationInitializer.leaderDecider())); + + // verify LeaseAssignmentModeChange consumer initialization + verify(mockConsumer).initialize(eq(true), eq(LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT)); + + // test initialization from state machine + migrationInitializer.initializeClientVersionFor3xWithRollback(ClientVersion.CLIENT_VERSION_INIT); + + verify(mockWorkerMetricsDAO).initialize(); + verify(mockWorkerMetricsScheduler).scheduleAtFixedRate(any(), anyLong(), anyLong(), any()); + verify(mockLeaseRefresher, never()).createLeaseOwnerToLeaseKeyIndexIfNotExists(); + verify(mockLeaseRefresher, never()).waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong()); + verify(mockMigrationAdaptiveLeaderDecider).updateLeaderDecider(mockDdbLockLeaderDecider); + verify(mockLam).start(); + } + + @ParameterizedTest + @CsvSource({"CLIENT_VERSION_UPGRADE_FROM_2X", "CLIENT_VERSION_2X"}) + public void testInitialize_ClientVersion_All2_X(final ClientVersion clientVersion) throws DependencyException { + // Test initializing to verify correct leader decider is created + migrationInitializer.initialize(clientVersion); + + verify(mockWorkerMetricsManager).startManager(); + + verify(mockDdbLockBasedLeaderDeciderCreator, never()).get(); + verify(mockAdaptiveLeaderDeciderCreator).get(); + verify(mockDeterministicLeaderDeciderCreator).get(); + verify(mockLamCreator).apply(eq(mockLamThreadPool), eq(migrationInitializer.leaderDecider())); + + // verify LeaseAssignmentModeChange consumer initialization + verify(mockConsumer).initialize(eq(true), eq(LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)); + + // test initialization from state machine + if (clientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X) { + migrationInitializer.initializeClientVersionForUpgradeFrom2x(ClientVersion.CLIENT_VERSION_INIT); + // start worker stats and create gsi without waiting + verify(mockWorkerMetricsDAO).initialize(); + verify(mockWorkerMetricsScheduler).scheduleAtFixedRate(any(), anyLong(), anyLong(), any()); + verify(mockLeaseRefresher).createLeaseOwnerToLeaseKeyIndexIfNotExists(); + verify(mockLeaseRefresher, never()).waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong()); + } else { + migrationInitializer.initializeClientVersionFor2x(ClientVersion.CLIENT_VERSION_INIT); + verify(mockWorkerMetricsDAO, never()).initialize(); + verify(mockWorkerMetricsScheduler, never()).scheduleAtFixedRate(any(), anyLong(), anyLong(), any()); + verify(mockLeaseRefresher, never()).createLeaseOwnerToLeaseKeyIndexIfNotExists(); + verify(mockLeaseRefresher, never()).waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong()); + } + + verify(mockMigrationAdaptiveLeaderDecider).updateLeaderDecider(mockDeterministicLeaderDecider); + verify(mockLam, never()).start(); + } + + @Test + public void testShutdown() throws InterruptedException, DependencyException { + when(mockLamThreadPool.awaitTermination(anyLong(), any())).thenReturn(true); + when(mockWorkerMetricsScheduler.awaitTermination(anyLong(), any())).thenReturn(true); + + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X); + migrationInitializer.shutdown(); + + verify(mockLamThreadPool).shutdown(); + verify(mockWorkerMetricsScheduler).shutdown(); + + verify(mockLam).stop(); + // leader decider is not shutdown from DynamicMigrationComponentsInitializer + // scheduler does the shutdown + // verify(migrationInitializer.leaderDecider()).shutdown(); + verify(mockWorkerMetricsManager).stopManager(); + } + + @Test + public void initializationFails_WhenGsiIsNotActiveIn3_X() throws DependencyException { + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X); + // test initialization from state machine + + assertThrows( + DependencyException.class, + () -> migrationInitializer.initializeClientVersionFor3x(ClientVersion.CLIENT_VERSION_INIT)); + } + + @Test + public void initializationDoesNotFail_WhenGsiIsNotActiveIn3_XWithRollback() throws DependencyException { + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + // test initialization from state machine + + assertDoesNotThrow( + () -> migrationInitializer.initializeClientVersionFor3xWithRollback(ClientVersion.CLIENT_VERSION_INIT)); + } + + @Test + public void testComponentsInitialization_AfterFlip() throws DependencyException { + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X); + migrationInitializer.initializeClientVersionForUpgradeFrom2x(ClientVersion.CLIENT_VERSION_INIT); + + // Test flip + migrationInitializer.initializeClientVersionFor3xWithRollback(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X); + + // verify + verify(mockLam).start(); + verify(mockConsumer).updateLeaseAssignmentMode(eq(LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT)); + verify(mockDdbLockBasedLeaderDeciderCreator).get(); + verify(mockMigrationAdaptiveLeaderDecider).updateLeaderDecider(eq(mockDdbLockLeaderDecider)); + } + + @Test + public void testComponentsInitialization_AfterRollForward() throws DependencyException { + final ScheduledFuture mockFuture = mock(ScheduledFuture.class); + + doReturn(mockFuture) + .when(mockWorkerMetricsScheduler) + .scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class)); + + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_2X); + migrationInitializer.initializeClientVersionFor2x(ClientVersion.CLIENT_VERSION_INIT); + + // test roll-forward + reset(mockWorkerMetricsScheduler); + reset(mockLeaseRefresher); + migrationInitializer.initializeClientVersionForUpgradeFrom2x(ClientVersion.CLIENT_VERSION_2X); + + // verify + verify(mockWorkerMetricsScheduler) + .scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class)); + verify(mockLeaseRefresher).createLeaseOwnerToLeaseKeyIndexIfNotExists(); + verify(mockLeaseRefresher, never()).waitUntilLeaseTableExists(anyLong(), anyLong()); + } + + @Test + public void testComponentsInitialization_Rollback_BeforeFlip() throws DependencyException { + final ScheduledFuture mockFuture = mock(ScheduledFuture.class); + doReturn(mockFuture) + .when(mockWorkerMetricsScheduler) + .scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class)); + + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X); + migrationInitializer.initializeClientVersionForUpgradeFrom2x(ClientVersion.CLIENT_VERSION_INIT); + + // test rollback before flip + migrationInitializer.initializeClientVersionFor2x(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X); + + // verify + verify(mockFuture).cancel(anyBoolean()); + } + + @Test + public void testComponentsInitialization_Rollback_AfterFlip() throws DependencyException { + final ScheduledFuture mockFuture = mock(ScheduledFuture.class); + doReturn(mockFuture) + .when(mockWorkerMetricsScheduler) + .scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class)); + + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + migrationInitializer.initializeClientVersionFor3xWithRollback(ClientVersion.CLIENT_VERSION_INIT); + + // test rollback before flip + migrationInitializer.initializeClientVersionFor2x(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + + // verify + verify(mockFuture).cancel(anyBoolean()); + verify(mockConsumer).updateLeaseAssignmentMode(eq(LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)); + verify(mockLam).stop(); + verify(mockDeterministicLeaderDeciderCreator).get(); + verify(mockMigrationAdaptiveLeaderDecider).updateLeaderDecider(mockDeterministicLeaderDecider); + } + + @Test + public void testWorkerMetricsReporting() throws DependencyException { + final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Runnable.class); + final ScheduledFuture mockFuture = mock(ScheduledFuture.class); + doReturn(mockFuture) + .when(mockWorkerMetricsScheduler) + .scheduleAtFixedRate(argumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + when(mockWorkerMetricsManager.getOperatingRange()).thenReturn(new HashMap>() { + { + put("CPU", Collections.singletonList(80L)); + } + }); + when(mockWorkerMetricsManager.computeMetrics()).thenReturn(new HashMap>() { + { + put("CPU", Arrays.asList(90.0, 85.0, 77.0, 91.0, 82.0)); + } + }); + + migrationInitializer.initialize(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + migrationInitializer.initializeClientVersionFor3xWithRollback(ClientVersion.CLIENT_VERSION_INIT); + + // run the worker stats reporting thread + argumentCaptor.getValue().run(); + + // verify + final ArgumentCaptor statsCaptor = ArgumentCaptor.forClass(WorkerMetricStats.class); + verify(mockWorkerMetricsDAO).updateMetrics(statsCaptor.capture()); + Assertions.assertEquals(workerIdentifier, statsCaptor.getValue().getWorkerId()); + Assertions.assertEquals( + 80L, statsCaptor.getValue().getOperatingRange().get("CPU").get(0)); + Assertions.assertEquals( + 90.0, statsCaptor.getValue().getMetricStats().get("CPU").get(0)); + Assertions.assertEquals( + 77.0, statsCaptor.getValue().getMetricStats().get("CPU").get(2)); + } + + private abstract static class DynamoDBLockBasedLeaderDeciderSupplier + implements Supplier {} + + private abstract static class DeterministicShuffleShardSyncLeaderDeciderSupplier + implements Supplier {} + + private abstract static class MigrationAdaptiveLeaderDeciderSupplier + implements Supplier {} + + private abstract static class LeaseAssignmentManagerSupplier + implements BiFunction {} +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamoDBAsyncToSyncClientAdapterTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamoDBAsyncToSyncClientAdapterTest.java new file mode 100644 index 000000000..bbccdc966 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/DynamoDBAsyncToSyncClientAdapterTest.java @@ -0,0 +1,202 @@ +package software.amazon.kinesis.coordinator; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeAction; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest; +import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.PutItemResponse; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest; +import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class DynamoDBAsyncToSyncClientAdapterTest { + + private static final String TEST_TABLE_NAME = "TestTable"; + + @Mock + private DynamoDbAsyncClient mockAsyncClient; + + private DynamoDbAsyncToSyncClientAdapter adapter; + + @Before + public void setup() { + MockitoAnnotations.initMocks(this); + adapter = new DynamoDbAsyncToSyncClientAdapter(mockAsyncClient); + } + + @Test + public void testGetItem() { + final Map key = new HashMap<>(); + key.put("id", AttributeValue.builder().s("1").build()); + final Map item = new HashMap<>(key); + item.put("data", AttributeValue.builder().s("test data").build()); + final GetItemRequest request = + GetItemRequest.builder().key(key).tableName(TEST_TABLE_NAME).build(); + final GetItemResponse expectedResponse = + GetItemResponse.builder().item(item).build(); + when(mockAsyncClient.getItem(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final GetItemResponse actualResponse = adapter.getItem(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).getItem(request); + } + + @Test + public void testPutItem() { + final Map item = new HashMap<>(); + item.put("id", AttributeValue.builder().s("1").build()); + item.put("data", AttributeValue.builder().s("test data").build()); + final PutItemRequest request = + PutItemRequest.builder().tableName(TEST_TABLE_NAME).item(item).build(); + final PutItemResponse expectedResponse = PutItemResponse.builder().build(); + when(mockAsyncClient.putItem(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final PutItemResponse actualResponse = adapter.putItem(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).putItem(request); + } + + @Test + public void testUpdateItem() { + final Map key = new HashMap<>(); + key.put("id", AttributeValue.builder().s("1").build()); + + final Map updates = new HashMap<>(); + updates.put( + "data", + AttributeValueUpdate.builder() + .value(AttributeValue.builder().s("updated data").build()) + .action(AttributeAction.PUT) + .build()); + + final UpdateItemRequest request = UpdateItemRequest.builder() + .tableName(TEST_TABLE_NAME) + .key(key) + .attributeUpdates(updates) + .build(); + + final UpdateItemResponse expectedResponse = UpdateItemResponse.builder().build(); + + when(mockAsyncClient.updateItem(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final UpdateItemResponse actualResponse = adapter.updateItem(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).updateItem(request); + } + + @Test + public void testDeleteItem() { + final Map key = new HashMap<>(); + key.put("id", AttributeValue.builder().s("1").build()); + final DeleteItemResponse expectedResponse = DeleteItemResponse.builder().build(); + final DeleteItemRequest request = + DeleteItemRequest.builder().tableName(TEST_TABLE_NAME).key(key).build(); + when(mockAsyncClient.deleteItem(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final DeleteItemResponse actualResponse = adapter.deleteItem(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).deleteItem(request); + } + + @Test + public void testCreateTable() { + final CreateTableRequest request = + CreateTableRequest.builder().tableName(TEST_TABLE_NAME).build(); + final CreateTableResponse expectedResponse = CreateTableResponse.builder() + .tableDescription( + TableDescription.builder().tableName(TEST_TABLE_NAME).build()) + .build(); + when(mockAsyncClient.createTable(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final CreateTableResponse actualResponse = adapter.createTable(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).createTable(request); + } + + @Test + public void testDescribeTable() { + final DescribeTableRequest request = + DescribeTableRequest.builder().tableName(TEST_TABLE_NAME).build(); + final DescribeTableResponse expectedResponse = DescribeTableResponse.builder() + .table(TableDescription.builder().tableName(TEST_TABLE_NAME).build()) + .build(); + when(mockAsyncClient.describeTable(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final DescribeTableResponse actualResponse = adapter.describeTable(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).describeTable(request); + } + + @Test + public void testDeleteTable() { + final DeleteTableRequest request = + DeleteTableRequest.builder().tableName(TEST_TABLE_NAME).build(); + final DeleteTableResponse expectedResponse = DeleteTableResponse.builder() + .tableDescription( + TableDescription.builder().tableName(TEST_TABLE_NAME).build()) + .build(); + when(mockAsyncClient.deleteTable(request)).thenReturn(CompletableFuture.completedFuture(expectedResponse)); + + final DeleteTableResponse actualResponse = adapter.deleteTable(request); + + assertEquals(expectedResponse, actualResponse); + verify(mockAsyncClient).deleteTable(request); + } + + @Test + public void testException() { + final GetItemRequest request = GetItemRequest.builder() + .tableName(TEST_TABLE_NAME) + .key(new HashMap() { + { + put("key", AttributeValue.fromS("anyKey")); + } + }) + .build(); + final ProvisionedThroughputExceededException exception = ProvisionedThroughputExceededException.builder() + .message("Test exception") + .build(); + when(mockAsyncClient.getItem(request)).thenReturn(CompletableFuture.supplyAsync(() -> { + throw exception; + })); + + try { + adapter.getItem(request); + fail("Expected RuntimeException"); + } catch (final ProvisionedThroughputExceededException e) { + assertEquals(exception, e); + } + verify(mockAsyncClient).getItem(request); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManagerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManagerTest.java index 1e6be18f3..e5522ff14 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManagerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/PeriodicShardSyncManagerTest.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import java.util.stream.Collectors; @@ -78,21 +79,25 @@ public class PeriodicShardSyncManagerTest { @Mock Map streamToShardSyncTaskManagerMap; + @Mock + ScheduledExecutorService mockScheduledExecutor; + @Before public void setup() { streamIdentifier = StreamIdentifier.multiStreamInstance("123456789012:stream:456"); periodicShardSyncManager = new PeriodicShardSyncManager( "worker", - leaderDecider, leaseRefresher, currentStreamConfigMap, shardSyncTaskManagerProvider, streamToShardSyncTaskManagerMap, + mockScheduledExecutor, true, new NullMetricsFactory(), 2 * 60 * 1000, 3, new AtomicBoolean(true)); + periodicShardSyncManager.start(leaderDecider); } @Test diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/SchedulerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/SchedulerTest.java index f5e81d4f4..366d73d12 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/SchedulerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/SchedulerTest.java @@ -27,12 +27,15 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.RejectedExecutionException; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.amazonaws.services.dynamodbv2.local.shared.access.AmazonDynamoDBLocal; import com.google.common.base.Joiner; import com.google.common.collect.Sets; import io.reactivex.rxjava3.plugins.RxJavaPlugins; @@ -67,6 +70,8 @@ import software.amazon.kinesis.leases.LeaseCleanupManager; import software.amazon.kinesis.leases.LeaseCoordinator; import software.amazon.kinesis.leases.LeaseManagementConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig; import software.amazon.kinesis.leases.LeaseManagementFactory; import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.MultiStreamLease; @@ -79,7 +84,6 @@ import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; import software.amazon.kinesis.lifecycle.LifecycleConfig; import software.amazon.kinesis.lifecycle.ShardConsumer; -import software.amazon.kinesis.lifecycle.TaskResult; import software.amazon.kinesis.lifecycle.events.InitializationInput; import software.amazon.kinesis.lifecycle.events.LeaseLostInput; import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput; @@ -109,6 +113,7 @@ import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyLong; import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Matchers.same; @@ -157,8 +162,8 @@ public class SchedulerTest { @Mock private KinesisAsyncClient kinesisClient; - @Mock - private DynamoDbAsyncClient dynamoDBClient; + private final AmazonDynamoDBLocal embedded = DynamoDBEmbedded.create(); + private DynamoDbAsyncClient dynamoDBClient = embedded.dynamoDbAsyncClient(); @Mock private CloudWatchAsyncClient cloudWatchClient; @@ -207,21 +212,23 @@ public void setup() { .parentShardPollIntervalMillis(100L) .workerStateChangeListener(workerStateChangeListener); leaseManagementConfig = new LeaseManagementConfig( - tableName, dynamoDBClient, kinesisClient, streamName, workerIdentifier) - .leaseManagementFactory(new TestKinesisLeaseManagementFactory(false, false)); + tableName, applicationName, dynamoDBClient, kinesisClient, workerIdentifier) + .leaseManagementFactory(new TestKinesisLeaseManagementFactory(false, false)) + .workerUtilizationAwareAssignmentConfig(new WorkerUtilizationAwareAssignmentConfig() + .disableWorkerMetrics(true) + .workerMetricsTableConfig(new WorkerMetricsTableConfig(applicationName))); lifecycleConfig = new LifecycleConfig(); metricsConfig = new MetricsConfig(cloudWatchClient, namespace); processorConfig = new ProcessorConfig(shardRecordProcessorFactory); retrievalConfig = new RetrievalConfig(kinesisClient, streamName, applicationName).retrievalFactory(retrievalFactory); when(leaseCoordinator.leaseRefresher()).thenReturn(dynamoDBLeaseRefresher); - when(shardSyncTaskManager.shardDetector()).thenReturn(shardDetector); - when(shardSyncTaskManager.hierarchicalShardSyncer()).thenReturn(new HierarchicalShardSyncer()); - when(shardSyncTaskManager.callShardSyncTask()).thenReturn(new TaskResult(null)); + when(leaseCoordinator.workerIdentifier()).thenReturn(workerIdentifier); + when(dynamoDBLeaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(anyLong(), anyLong())) + .thenReturn(true); when(retrievalFactory.createGetRecordsCache( any(ShardInfo.class), any(StreamConfig.class), any(MetricsFactory.class))) .thenReturn(recordsPublisher); - when(shardDetector.streamIdentifier()).thenReturn(mock(StreamIdentifier.class)); when(kinesisClient.serviceClientConfiguration()) .thenReturn(KinesisServiceClientConfiguration.builder() .region(TEST_REGION) @@ -350,7 +357,7 @@ public final void testInitializationFailureWithRetries() throws Exception { doNothing().when(leaseCoordinator).initialize(); when(dynamoDBLeaseRefresher.isLeaseTableEmpty()).thenThrow(new RuntimeException()); leaseManagementConfig = new LeaseManagementConfig( - tableName, dynamoDBClient, kinesisClient, streamName, workerIdentifier) + tableName, applicationName, dynamoDBClient, kinesisClient, workerIdentifier) .leaseManagementFactory(new TestKinesisLeaseManagementFactory(false, true)); scheduler = new Scheduler( checkpointConfig, @@ -371,7 +378,7 @@ public final void testInitializationFailureWithRetriesWithConfiguredMaxInitializ final int maxInitializationAttempts = 5; coordinatorConfig.maxInitializationAttempts(maxInitializationAttempts); leaseManagementConfig = new LeaseManagementConfig( - tableName, dynamoDBClient, kinesisClient, streamName, workerIdentifier) + tableName, applicationName, dynamoDBClient, kinesisClient, workerIdentifier) .leaseManagementFactory(new TestKinesisLeaseManagementFactory(false, true)); scheduler = new Scheduler( checkpointConfig, @@ -395,7 +402,8 @@ public final void testInitializationFailureWithRetriesWithConfiguredMaxInitializ public final void testMultiStreamInitialization() { retrievalConfig = new RetrievalConfig(kinesisClient, multiStreamTracker, applicationName) .retrievalFactory(retrievalFactory); - leaseManagementConfig = new LeaseManagementConfig(tableName, dynamoDBClient, kinesisClient, workerIdentifier) + leaseManagementConfig = new LeaseManagementConfig( + tableName, applicationName, dynamoDBClient, kinesisClient, workerIdentifier) .leaseManagementFactory(new TestKinesisLeaseManagementFactory(true, true)); scheduler = new Scheduler( checkpointConfig, @@ -416,7 +424,8 @@ public final void testMultiStreamInitialization() { public final void testMultiStreamInitializationWithFailures() { retrievalConfig = new RetrievalConfig(kinesisClient, multiStreamTracker, applicationName) .retrievalFactory(retrievalFactory); - leaseManagementConfig = new LeaseManagementConfig(tableName, dynamoDBClient, kinesisClient, workerIdentifier) + leaseManagementConfig = new LeaseManagementConfig( + tableName, applicationName, dynamoDBClient, kinesisClient, workerIdentifier) .leaseManagementFactory(new TestKinesisLeaseManagementFactory(true, true)); scheduler = new Scheduler( checkpointConfig, @@ -1111,13 +1120,6 @@ public final void testMultiStreamNewStreamsAreSyncedAndStaleStreamsAreDeletedAft processorConfig, retrievalConfig)); when(scheduler.shouldSyncStreamsNow()).thenReturn(true); - when(multiStreamTracker.formerStreamsLeasesDeletionStrategy()) - .thenReturn(new AutoDetectionAndDeferredDeletionStrategy() { - @Override - public Duration waitPeriodToDeleteFormerStreams() { - return Duration.ZERO; - } - }); Set syncedStreams = scheduler.checkAndSyncStreamShardsAndLeases(); Set expectedSyncedStreams = IntStream.concat(IntStream.range(1, 3), IntStream.range(5, 7)) .mapToObj(streamId -> StreamIdentifier.multiStreamInstance( @@ -1145,7 +1147,6 @@ public final void testInitializationWaitsWhenLeaseTableIsEmpty() throws Exceptio processorConfig, retrievalConfig); - doNothing().when(leaseCoordinator).initialize(); when(dynamoDBLeaseRefresher.isLeaseTableEmpty()).thenReturn(true); long startTime = System.currentTimeMillis(); @@ -1171,7 +1172,6 @@ public final void testInitializationDoesntWaitWhenLeaseTableIsNotEmpty() throws processorConfig, retrievalConfig); - doNothing().when(leaseCoordinator).initialize(); when(dynamoDBLeaseRefresher.isLeaseTableEmpty()).thenReturn(false); long startTime = System.currentTimeMillis(); @@ -1249,6 +1249,7 @@ public void testSyncLeaseAsThisIsInitialAppBootstrapEvenThoughStreamMapContainsA multiStreamTracker .streamConfigList() .forEach(s -> scheduler.currentStreamConfigMap().put(s.streamIdentifier(), s)); + scheduler.initialize(); scheduler.runProcessLoop(); verify(scheduler).syncStreamsFromLeaseTableOnAppInit(any()); assertTrue(scheduler.currentStreamConfigMap().size() != 0); @@ -1257,6 +1258,7 @@ public void testSyncLeaseAsThisIsInitialAppBootstrapEvenThoughStreamMapContainsA @Test public void testNotRefreshForNewStreamAfterLeaderFlippedTheShouldInitialize() { prepareMultiStreamScheduler(createDummyStreamConfigList(1, 6)); + scheduler.initialize(); // flip the shouldInitialize flag scheduler.runProcessLoop(); verify(scheduler, times(1)).syncStreamsFromLeaseTableOnAppInit(any()); @@ -1682,6 +1684,12 @@ public LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory) { return leaseCoordinator; } + @Override + public LeaseCoordinator createLeaseCoordinator( + MetricsFactory metricsFactory, ConcurrentMap shardInfoShardConsumerMap) { + return leaseCoordinator; + } + @Override public ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory) { return shardSyncTaskManager; @@ -1702,8 +1710,6 @@ public ShardSyncTaskManager createShardSyncTaskManager( when(shardSyncTaskManager.shardDetector()).thenReturn(shardDetector); final HierarchicalShardSyncer hierarchicalShardSyncer = new HierarchicalShardSyncer(); when(shardSyncTaskManager.hierarchicalShardSyncer()).thenReturn(hierarchicalShardSyncer); - when(shardDetector.streamIdentifier()).thenReturn(streamConfig.streamIdentifier()); - when(shardSyncTaskManager.callShardSyncTask()).thenReturn(new TaskResult(null)); if (shardSyncFirstAttemptFailure) { when(shardDetector.listShards()) .thenThrow(new RuntimeException("Service Exception")) diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManagerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManagerTest.java new file mode 100644 index 000000000..930fdcd79 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/assignment/LeaseAssignmentManagerTest.java @@ -0,0 +1,1276 @@ +package software.amazon.kinesis.coordinator.assignment; + +import java.time.Duration; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import lombok.var; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; +import software.amazon.awssdk.enhanced.dynamodb.TableSchema; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.kinesis.common.DdbTableConfig; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseManagementConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer; +import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; + +import static java.util.Objects.nonNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber.TRIM_HORIZON; + +class LeaseAssignmentManagerTest { + + private static final String TEST_LEADER_WORKER_ID = "workerId"; + private static final String TEST_TAKE_WORKER_ID = "workerIdTake"; + private static final String TEST_YIELD_WORKER_ID = "workerIdYield"; + + private static final String LEASE_TABLE_NAME = "leaseTable"; + private static final String WORKER_METRICS_TABLE_NAME = "workerMetrics"; + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); + private LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig = + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder() + .isGracefulLeaseHandoffEnabled(false) + .build(); + // TODO : Use DynamoDBLockBasedLeaderDecider with LocalDDBClient when other CR is merged. + private LeaderDecider mockLeaderDecider; + private ScheduledExecutorService scheduledExecutorService; + private ScheduledFuture scheduledFuture; + private Runnable leaseAssignmentManagerRunnable; + private final LeaseRefresher leaseRefresher = new DynamoDBLeaseRefresher( + LEASE_TABLE_NAME, + dynamoDbAsyncClient, + new DynamoDBLeaseSerializer(), + true, + TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); + private WorkerMetricStatsDAO workerMetricsDAO; + + @BeforeEach + void setup() throws ProvisionedThroughputException, DependencyException { + final WorkerMetricsTableConfig config = new WorkerMetricsTableConfig("applicationName"); + config.tableName(WORKER_METRICS_TABLE_NAME); + workerMetricsDAO = new WorkerMetricStatsDAO(dynamoDbAsyncClient, config, 10000L); + workerMetricsDAO.initialize(); + mockLeaderDecider = Mockito.mock(LeaderDecider.class); + scheduledExecutorService = Mockito.mock(ScheduledExecutorService.class); + scheduledFuture = Mockito.mock(ScheduledFuture.class); + when(mockLeaderDecider.isLeader(any())).thenReturn(true); + when(scheduledExecutorService.scheduleWithFixedDelay( + any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class))) + .thenAnswer(invocation -> { + Object[] args = invocation.getArguments(); + this.leaseAssignmentManagerRunnable = (Runnable) args[0]; + return scheduledFuture; + }); + when(scheduledFuture.cancel(anyBoolean())).thenReturn(true); + leaseRefresher.createLeaseTableIfNotExists(); + } + + @Test + void performAssignment_yieldAndTakeWorker_validateNewLeaseAssignedToTakeWorker() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something")); + + leaseAssignmentManagerRunnable.run(); + + assertEquals(TEST_TAKE_WORKER_ID, leaseRefresher.listLeases().get(0).leaseOwner()); + } + + @Test + void performAssignment_workerWithFailingWorkerMetric_assertLeaseNotAssignedToWorkerWithFailingWorkerMetric() + throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + final String workerWithFailingWorkerMetricId = "WorkerIdOfFailingWorkerMetric"; + + workerMetricsDAO.updateMetrics(createWorkerWithFailingWorkerMetric(workerWithFailingWorkerMetricId)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something1")); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something2")); + + leaseAssignmentManagerRunnable.run(); + assertEquals( + 0, + leaseRefresher.listLeases().stream() + .filter(lease -> workerWithFailingWorkerMetricId.equals(lease.leaseOwner())) + .collect(Collectors.toSet()) + .size()); + } + + @Test + void performAssignment_workerWithFailingWorkerMetricInPast_assertLeaseAssignment() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + final String workerWithFailingWorkerMetricId = "WorkerIdOfFailingWorkerMetric"; + + workerMetricsDAO.updateMetrics(createWorkerWithFailingWorkerMetricInPast(workerWithFailingWorkerMetricId)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something1")); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something2")); + + leaseAssignmentManagerRunnable.run(); + assertEquals( + 2, + leaseRefresher.listLeases().stream() + .filter(lease -> workerWithFailingWorkerMetricId.equals(lease.leaseOwner())) + .collect(Collectors.toSet()) + .size()); + } + + @Test + void performAssignment_noThroughputToWorker_assertOneLeaseTaken() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + // 10 leases are assigned to yield worker, all have zero throughput + for (int i = 0; i < 10; ++i) { + final Lease lease = createDummyLease("lease" + i, TEST_YIELD_WORKER_ID); + lease.throughputKBps(0D); + populateLeasesInLeaseTable(lease); + } + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 9, + leaseRefresher.listLeases().stream() + .filter(lease -> TEST_YIELD_WORKER_ID.equals(lease.leaseOwner())) + .collect(Collectors.toSet()) + .size()); + // Only 1 lease is expected to be taken as during zero throughput we fall back to taking 1 lease. + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> TEST_TAKE_WORKER_ID.equals(lease.leaseOwner())) + .collect(Collectors.toSet()) + .size()); + } + + @Test + void performAssignment_moreLeasesThanMaxConfigured_assertSomeUnassignedLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config = + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20); + createLeaseAssignmentManager(config, 100L, System::nanoTime, 2); + + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("lease1")); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("lease2")); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("lease3")); + + leaseAssignmentManagerRunnable.run(); + assertEquals( + 2L, + leaseRefresher.listLeases().stream() + .filter(lease -> nonNull(lease.leaseOwner())) + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + } + + @Test + void performAssignment_unAssignedAndExpiredLeasesBothAvailable_validateUnAssignedLeaseAssignedFirst() + throws Exception { + + final Supplier mockNanoTimeProvider = Mockito.mock(Supplier.class); + when(mockNanoTimeProvider.get()) + .thenReturn(Duration.ofMillis(100).toNanos()) + .thenReturn(Duration.ofMillis(110).toNanos()); + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(9D, 20), 1L, mockNanoTimeProvider, Integer.MAX_VALUE); + + leaseRefresher.createLeaseIfNotExists(createDummyLease("expiredLease", "random-owner")); + + // No assignment will happen as no workers not existing, but will start tracking lease for expiry + leaseAssignmentManagerRunnable.run(); + + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("unAssignedLease")); + + leaseAssignmentManagerRunnable.run(); + + assertNotEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("expiredLease").leaseOwner()); + assertEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("unAssignedLease").leaseOwner()); + } + + @Test + void performAssignment_workerNotAboveReBalanceThresholdButAboveOperatingRange_asserReBalance() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyWorkerMetrics("Worker1", 41, 50)); + workerMetricsDAO.updateMetrics(createDummyWorkerMetrics("Worker2", 59, 50)); + + final Lease lease1 = createDummyLease("lease1", "Worker2"); + lease1.throughputKBps(1000); + final Lease lease2 = createDummyLease("lease2", "Worker2"); + lease2.throughputKBps(1); + populateLeasesInLeaseTable(lease1, lease2); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals("Worker1")) + .count()); + } + + @Test + void performAssignment_inActiveWorkerWithLowUtilizationAvailable_verifyLeaseNotAssigned() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final String inActiveWorkerId = "InActiveWorker"; + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + // workerMetricsDAO has validation to allow expired WorkerMetricStats writes, so write directly to table + // for test + writeToWorkerMetricsTables(createInActiveWorkerWithNoUtilization(inActiveWorkerId)); + + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(9D, 20), 1L, System::nanoTime, Integer.MAX_VALUE); + + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("leaseKey1")); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("leaseKey2")); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 2L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_YIELD_WORKER_ID)) + .count()); + assertEquals( + 0L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(inActiveWorkerId)) + .count()); + } + + @Test + void performAssignment_takeAndYieldWorkers_verifyThroughoutTaken() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID); + lease1.throughputKBps(100D); + final Lease lease2 = createDummyLease("lease2", TEST_YIELD_WORKER_ID); + lease2.throughputKBps(200D); + final Lease lease3 = createDummyLease("lease3", TEST_YIELD_WORKER_ID); + lease3.throughputKBps(300D); + final Lease lease4 = createDummyLease("lease4", TEST_YIELD_WORKER_ID); + lease4.throughputKBps(400D); + + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4); + + leaseAssignmentManagerRunnable.run(); + + // Yield worker has total of 1000KBps throughput assigned, based on the average will take around 17% of + // throughput + assertTrue(leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .mapToDouble(Lease::throughputKBps) + .sum() + >= 100D); + } + + @Test + void performAssignment_takeOvershootingLease_verifySmallestLeaseTaken() throws Exception { + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + // 3 leases. the yield worker has 90% CPU util so each lease is contributing around 30% of the total cpu. + // The taker worker is sitting at 50%. So any one of the leases will cause throughput overshoot + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID); + lease1.throughputKBps(300D); + final Lease lease2 = createDummyLease("lease2", TEST_YIELD_WORKER_ID); + lease2.throughputKBps(299D); + final Lease lease3 = createDummyLease("lease3", TEST_YIELD_WORKER_ID); + lease3.throughputKBps(301D); + final Lease lease4 = createDummyLease("lease4", TEST_TAKE_WORKER_ID); + lease4.throughputKBps(3000D); + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4); + + // 1. test with the config set to false. No lease should be picked + final var config = getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10); + config.allowThroughputOvershoot(false); + createLeaseAssignmentManager(config, Duration.ofHours(1).toMillis(), System::nanoTime, Integer.MAX_VALUE); + + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + // 2. test with config set to true. Take one lease + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + leaseAssignmentManagerRunnable.run(); + + final List leaseKeysAssignedToTakeWorker = leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .map(Lease::leaseKey) + .collect(Collectors.toList()); + assertEquals(2, leaseKeysAssignedToTakeWorker.size()); + assertTrue(leaseKeysAssignedToTakeWorker.contains("lease4")); + assertTrue(leaseKeysAssignedToTakeWorker.contains("lease2")); + } + + @Test + void performAssignment_takeAndYieldWorkerWithSeveralLeases_verifyBalancingBetweenLeases() throws Exception { + + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID + "1")); + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID + "2")); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID + "1"); + final Lease lease2 = createDummyLease("lease2", TEST_YIELD_WORKER_ID + "1"); + lease2.throughputKBps(1000); + + final Lease lease3 = createDummyLease("lease3", TEST_YIELD_WORKER_ID + "2"); + final Lease lease4 = createDummyLease("lease4", TEST_YIELD_WORKER_ID + "2"); + lease4.throughputKBps(1000); + + final Lease lease5 = createDummyLease("lease5", TEST_TAKE_WORKER_ID); + + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4, lease5); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 3L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + assertEquals( + 2L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_YIELD_WORKER_ID + "1") + || lease.leaseOwner().equals(TEST_YIELD_WORKER_ID + "2")) + .count()); + assertTrue(leaseRefresher.listLeases().stream() + .anyMatch(lease -> lease.leaseOwner().equals(TEST_YIELD_WORKER_ID + "1"))); + assertTrue(leaseRefresher.listLeases().stream() + .anyMatch(lease -> lease.leaseOwner().equals(TEST_YIELD_WORKER_ID + "2"))); + } + + @Test + void performAssignment_varianceBalanceFreq3_asserLoadBalancingEvery3Iteration() throws Exception { + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config = + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10); + config.varianceBalancingFrequency(3); + createLeaseAssignmentManager(config, Duration.ofHours(1).toMillis(), System::nanoTime, Integer.MAX_VALUE); + + setupConditionForVarianceBalancing(); + // 1sh Run, expect re-balance + leaseAssignmentManagerRunnable.run(); + assertEquals( + 3L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + setupConditionForVarianceBalancing(); + // 2nd Run, expect no re-balance + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + setupConditionForVarianceBalancing(); + // 3nd Run, expect no re-balance + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + setupConditionForVarianceBalancing(); + // 4th Run, expect re-balance + leaseAssignmentManagerRunnable.run(); + assertEquals( + 3L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + setupConditionForVarianceBalancing(); + // 5th Run, expect no re-balance + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + } + + private void setupConditionForVarianceBalancing() throws Exception { + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID + "1")); + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID + "2")); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID + "1"); + final Lease lease2 = createDummyLease("lease2", TEST_YIELD_WORKER_ID + "1"); + lease2.throughputKBps(1000); + + final Lease lease3 = createDummyLease("lease3", TEST_YIELD_WORKER_ID + "2"); + final Lease lease4 = createDummyLease("lease4", TEST_YIELD_WORKER_ID + "2"); + lease4.throughputKBps(1000); + + final Lease lease5 = createDummyLease("lease5", TEST_TAKE_WORKER_ID); + + leaseRefresher.deleteLease(lease1); + leaseRefresher.deleteLease(lease2); + leaseRefresher.deleteLease(lease3); + leaseRefresher.deleteLease(lease4); + leaseRefresher.deleteLease(lease5); + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4, lease5); + } + + @Test + void performAssignment_withLeaderSwitchOver_assertAssignmentOnlyAfterBeingLeader() throws Exception { + when(mockLeaderDecider.isLeader(anyString())).thenReturn(false).thenReturn(true); + + final Supplier mockNanoTimeProvider = Mockito.mock(Supplier.class); + when(mockNanoTimeProvider.get()) + .thenReturn(Duration.ofMillis(100).toNanos()) + .thenReturn(Duration.ofMillis(110).toNanos()) + .thenReturn(Duration.ofMillis(120).toNanos()); + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 1L, + mockNanoTimeProvider, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("unAssignedLease")); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease-1", "someOwner")); + + // First time call is made, the worker is not leader, no assignment + leaseAssignmentManagerRunnable.run(); + + assertFalse(leaseRefresher.listLeases().stream() + .anyMatch(lease -> + lease.leaseOwner() != null && lease.leaseOwner().equals(TEST_TAKE_WORKER_ID))); + + // Second time call is made, the worker is leader, for unAssignedLease assignment is done + leaseAssignmentManagerRunnable.run(); + + assertEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("unAssignedLease").leaseOwner()); + assertEquals("someOwner", leaseRefresher.getLease("lease-1").leaseOwner()); + + // Third time call is made, the worker is leader, for expiredLease assignment is done + leaseAssignmentManagerRunnable.run(); + + assertEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("unAssignedLease").leaseOwner()); + assertEquals(TEST_TAKE_WORKER_ID, leaseRefresher.getLease("lease-1").leaseOwner()); + } + + @Test + void performAssignment_underUtilizedWorker_assertBalancingAndUnassignedLeaseAssignmentToSameWorker() + throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 10), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease leaseY1 = createDummyLease("leaseY1", TEST_YIELD_WORKER_ID); + leaseY1.throughputKBps(1000); + final Lease leaseT2 = createDummyUnAssignedLease("leaseT1"); + + populateLeasesInLeaseTable(leaseY1, leaseT2); + + populateLeasesInLeaseTable(createDummyLease("leaseY2", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY3", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY5", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY6", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY7", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY8", TEST_YIELD_WORKER_ID)); + populateLeasesInLeaseTable(createDummyLease("leaseY9", TEST_YIELD_WORKER_ID)); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 4L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + } + + @Test + void performAssignment_workerWithHotWorkerMetricButNotAboveAverage_validateRebalance() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final String randomWorkerId = "randomWorkerId"; + // Setting reBalance threshold as INT_MAX which means no reBalance due to variance in utilization ratio + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createWorkerWithHotWorkerMetricStats(randomWorkerId)); + final WorkerMetricStats takeWorkerStats = createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID); + takeWorkerStats.setMetricStats(ImmutableMap.of("C", ImmutableList.of(40D, 40D))); + workerMetricsDAO.updateMetrics(takeWorkerStats); + + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey1", randomWorkerId)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey2", randomWorkerId)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey3", randomWorkerId)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey4", randomWorkerId)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey5", randomWorkerId)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey6", randomWorkerId)); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 5, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(randomWorkerId)) + .count()); + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + } + + @Test + void performAssignment_yieldWorkerWithSingleLease_assertReBalance() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID); + lease1.throughputKBps(5); + final Lease lease2 = createDummyLease("lease2", TEST_TAKE_WORKER_ID); + lease2.throughputKBps(30); + populateLeasesInLeaseTable(lease1, lease2); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 2L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_TAKE_WORKER_ID)) + .count()); + + assertEquals( + 0L, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(TEST_YIELD_WORKER_ID)) + .count()); + } + + @Test + void performAssignment_continuousFailure_assertLeadershipRelease() throws Exception { + final Supplier mockFailingNanoTimeProvider = Mockito.mock(Supplier.class); + when(mockFailingNanoTimeProvider.get()).thenThrow(new RuntimeException("IAmAlwaysFailing")); + + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + Duration.ofHours(1).toMillis(), + mockFailingNanoTimeProvider, + Integer.MAX_VALUE); + + leaseAssignmentManagerRunnable.run(); + verify(mockLeaderDecider, times(0)).releaseLeadershipIfHeld(); + leaseAssignmentManagerRunnable.run(); + verify(mockLeaderDecider, times(0)).releaseLeadershipIfHeld(); + leaseAssignmentManagerRunnable.run(); + // After 3 failures, leadership is expected to be released. + verify(mockLeaderDecider, times(1)).releaseLeadershipIfHeld(); + } + + private void populateLeasesInLeaseTable(Lease... leases) throws Exception { + for (Lease lease : leases) { + leaseRefresher.createLeaseIfNotExists(lease); + } + } + + @Test + void startStopValidation_sanity() + throws InterruptedException, ProvisionedThroughputException, InvalidStateException, DependencyException { + final LeaseAssignmentManager leaseAssignmentManager = createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + Duration.ofHours(1).toMillis(), + System::nanoTime, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("something")); + + leaseAssignmentManagerRunnable.run(); + + when(scheduledFuture.isDone()).thenReturn(true); + leaseAssignmentManager.stop(); + + verify(scheduledFuture).cancel(anyBoolean()); + // Validate the assignment did happen + assertEquals(TEST_TAKE_WORKER_ID, leaseRefresher.listLeases().get(0).leaseOwner()); + } + + @Test + void performAssignment_staleWorkerMetricsEntries_assertCleaning() { + LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config = + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20); + + config.staleWorkerMetricsEntryCleanupDuration(Duration.ofHours(60)); + createLeaseAssignmentManager(config, Duration.ofHours(1).toMillis(), System::nanoTime, Integer.MAX_VALUE); + // Non expired workerMetrics + writeToWorkerMetricsTables(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final WorkerMetricStats expiredWorkerStats = createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID); + expiredWorkerStats.setLastUpdateTime( + Instant.now().minus(100, ChronoUnit.HOURS).getEpochSecond()); + // expired workerMetrics + writeToWorkerMetricsTables(expiredWorkerStats); + + leaseAssignmentManagerRunnable.run(); + + assertEquals(1, workerMetricsDAO.getAllWorkerMetricStats().size()); + assertEquals( + TEST_TAKE_WORKER_ID, + workerMetricsDAO.getAllWorkerMetricStats().get(0).getWorkerId()); + } + + @Test + void performAssignment_testRetryBehavior() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + + final WorkerMetricStatsDAO mockedWorkerMetricsDAO = Mockito.mock(WorkerMetricStatsDAO.class); + final LeaseRefresher mockedLeaseRefresher = Mockito.mock(LeaseRefresher.class); + + when(mockedLeaseRefresher.listLeasesParallely(any(), anyInt())).thenThrow(new RuntimeException()); + when(mockedWorkerMetricsDAO.getAllWorkerMetricStats()).thenThrow(new RuntimeException()); + + final LeaseAssignmentManager leaseAssignmentManager = new LeaseAssignmentManager( + mockedLeaseRefresher, + mockedWorkerMetricsDAO, + mockLeaderDecider, + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + TEST_LEADER_WORKER_ID, + 100L, + new NullMetricsFactory(), + scheduledExecutorService, + System::nanoTime, + Integer.MAX_VALUE, + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder() + .isGracefulLeaseHandoffEnabled(false) + .build()); + + leaseAssignmentManager.start(); + + leaseAssignmentManagerRunnable.run(); + + verify(mockedLeaseRefresher, times(2)).listLeasesParallely(any(), anyInt()); + verify(mockedWorkerMetricsDAO, times(2)).getAllWorkerMetricStats(); + } + + @Test + void performAssignment_invalidLeaseInTable_validateAssignmentDoesNotFail() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("validLeaseKey")); + + // This lease fails to deserialize as it does not have required parameters still the assignment + // does not fail and gracefully handle this. + createAndPutBadLeaseEntryInTable("badLeaseKey"); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("validLeaseKey").leaseOwner()); + } + + @Test + void performAssignment_invalidWorkerMetricsEntry_validateAssignmentDoesNotFail() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyUnAssignedLease("leaseKey")); + + // This lease fails to deserialize as it does not have required parameters still the assignment + // does not fail and gracefully handle this. + createAndPutBadWorkerMetricsEntryInTable("badWorkerId"); + + leaseAssignmentManagerRunnable.run(); + + assertEquals(TEST_TAKE_WORKER_ID, leaseRefresher.getLease("leaseKey").leaseOwner()); + } + + @Test + void performAssignment_testAssignmentHandlingForDifferentPendingCheckpointStatesLeases() throws Exception { + final long leaseDurationMillis = 100; + final long currentTimeMillis = 10000; + + final Supplier mockNanoTimeProvider = Mockito.mock(Supplier.class); + when(mockNanoTimeProvider.get()) + .thenReturn(Duration.ofMillis(currentTimeMillis).toNanos()); + + gracefulLeaseHandoffConfig = + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder().build(); + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + leaseDurationMillis, + mockNanoTimeProvider, + Integer.MAX_VALUE); + + workerMetricsDAO.updateMetrics(createDummyYieldWorkerMetrics(TEST_YIELD_WORKER_ID)); + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + + final Lease lease1 = createDummyLease("lease1", TEST_YIELD_WORKER_ID); + lease1.throughputKBps(5D); + final Lease lease2 = createDummyLease("lease2", TEST_YIELD_WORKER_ID); + lease2.throughputKBps(30D); + populateLeasesInLeaseTable(lease1, lease2); + + // 1: Check one lease is marked for pending shutdown + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(l -> TEST_YIELD_WORKER_ID.equals(l.checkpointOwner())) + .count()); + final Lease l1 = leaseRefresher.listLeases().stream() + .filter(l -> TEST_YIELD_WORKER_ID.equals(l.checkpointOwner())) + .findFirst() + .get(); + + // 2. This is a no-op because pending checkpoint is not expired + leaseAssignmentManagerRunnable.run(); + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(l -> TEST_YIELD_WORKER_ID.equals(l.checkpointOwner())) + .count()); + final Lease l2 = leaseRefresher.listLeases().stream() + .filter(l -> TEST_YIELD_WORKER_ID.equals(l.checkpointOwner())) + .findFirst() + .get(); + assertEquals(l1.leaseKey(), l2.leaseKey()); + + // 3. Fast-forward the time to expire the lease. There should be no lease in pending checkpoint state + final long newTimeMillis = leaseDurationMillis + + gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + + TimeUnit.NANOSECONDS.toMillis(currentTimeMillis) + + 100000; + when(mockNanoTimeProvider.get()) + .thenReturn(Duration.ofMillis(newTimeMillis).toNanos()); + // Renew the lease2 as time is fast forwarded so the lease2 which is assigned to yield worker does not get + // assigned to takeWorker. Otherwise the lease1 assigned to takeWorker is assigned back to yield worker as + // after assignment of expired lease lease takeWorker is more on CPU. + leaseRefresher.renewLease(lease2); + leaseAssignmentManagerRunnable.run(); + assertEquals( + 0, + leaseRefresher.listLeases().stream() + .filter(l -> l.checkpointOwner() != null) + .count()); + } + + @Test + void performAssignment_expiredLeasesButPendingCheckpointNotExpiredLease_validateItIsAssigned() throws Exception { + final Supplier mockNanoTimeProvider = Mockito.mock(Supplier.class); + when(mockNanoTimeProvider.get()) + .thenReturn(Duration.ofMillis(100).toNanos()) + .thenReturn(Duration.ofMillis(110).toNanos()); + gracefulLeaseHandoffConfig = LeaseManagementConfig.GracefulLeaseHandoffConfig.builder() + .gracefulLeaseHandoffTimeoutMillis(30000) + .build(); + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(9D, 20), 1L, mockNanoTimeProvider, Integer.MAX_VALUE); + + final Lease expireLease = createDummyLease("expiredLease", "random-owner"); + // LAM will add a timeout one pending checkpoint when it sees this attribute. + expireLease.checkpointOwner("another-random-owner"); + leaseRefresher.createLeaseIfNotExists(expireLease); + + // run one so lease is tracked but since there is no worker participating in lease assignment yet we don't + // assign it. + leaseAssignmentManagerRunnable.run(); + assertNotEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("expiredLease").leaseOwner()); + + // add a host now + workerMetricsDAO.updateMetrics(createDummyTakeWorkerMetrics(TEST_TAKE_WORKER_ID)); + leaseAssignmentManagerRunnable.run(); + assertEquals( + TEST_TAKE_WORKER_ID, leaseRefresher.getLease("expiredLease").leaseOwner()); + } + + @Test + void loadInMemoryStorageView_testDefaultWorkerMetricTakeLeasesUtilRatioCalculation() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + final WorkerMetricStats workerMetrics1 = createDummyDefaultWorkerMetrics("worker1"); + final WorkerMetricStats workerMetrics2 = createDummyTakeWorkerMetrics("worker2"); + + workerMetricsDAO.updateMetrics(workerMetrics1); + workerMetricsDAO.updateMetrics(workerMetrics2); + + populateLeasesInLeaseTable( + createDummyUnAssignedLease("lease1"), + createDummyUnAssignedLease("lease2"), + createDummyUnAssignedLease("lease3"), + createDummyLease("lease6", workerMetrics2.getWorkerId())); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 3, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .count()); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .count()); + } + + @Test + void loadInMemoryStorageView_assertNoLeasesTakenFromOptimallyUtilizedDefaultWorkerMetricWorker() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + final WorkerMetricStats workerMetrics1 = createDummyDefaultWorkerMetrics("worker1"); + final WorkerMetricStats workerMetrics2 = createDummyTakeWorkerMetrics("worker2"); + + workerMetricsDAO.updateMetrics(workerMetrics1); + workerMetricsDAO.updateMetrics(workerMetrics2); + + final Lease lease1 = createDummyLease("lease1", workerMetrics1.getWorkerId()); + lease1.throughputKBps(100D); + final Lease lease2 = createDummyLease("lease2", workerMetrics1.getWorkerId()); + lease2.throughputKBps(200D); + final Lease lease3 = createDummyLease("lease3", workerMetrics2.getWorkerId()); + lease3.throughputKBps(300D); + final Lease lease4 = createDummyLease("lease4", workerMetrics2.getWorkerId()); + lease4.throughputKBps(400D); + + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 2, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .count()); + + assertEquals( + 2, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .count()); + } + + @Test + void loadInMemoryStorageView_assertNoLeasesTakenWhenDefaultWorkerMetricAndCPUWorkerMetricWorkersAreOverloaded() + throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + final WorkerMetricStats workerMetrics1 = createDummyDefaultWorkerMetrics("worker1"); + final WorkerMetricStats workerMetrics2 = createDummyYieldWorkerMetrics("worker2"); + + workerMetricsDAO.updateMetrics(workerMetrics1); + workerMetricsDAO.updateMetrics(workerMetrics2); + + final Lease lease1 = createDummyLease("lease1", workerMetrics1.getWorkerId()); + final Lease lease2 = createDummyLease("lease2", workerMetrics2.getWorkerId()); + + populateLeasesInLeaseTable(lease1, lease2); + + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .count()); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .count()); + } + + // @Test + void loadInMemoryStorageView_assertLeasesAreTakenWhenDefaultWorkerMetricWorkerIsOverloaded() throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + final WorkerMetricStats workerMetrics1 = createDummyDefaultWorkerMetrics("worker1"); + final WorkerMetricStats workerMetrics2 = createDummyTakeWorkerMetrics("worker2"); + + workerMetricsDAO.updateMetrics(workerMetrics1); + workerMetricsDAO.updateMetrics(workerMetrics2); + + final Lease lease1 = createDummyLease("lease1", workerMetrics1.getWorkerId()); + lease1.throughputKBps(1000D); + final Lease lease2 = createDummyLease("lease2", workerMetrics1.getWorkerId()); + lease2.throughputKBps(1000D); + final Lease lease3 = createDummyLease("lease3", workerMetrics2.getWorkerId()); + lease3.throughputKBps(1D); + final Lease lease4 = createDummyLease("lease4", workerMetrics2.getWorkerId()); + lease4.throughputKBps(1D); + + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4); + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .count()); + + assertEquals( + 3, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .count()); + + assertEquals( + 1000D, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .mapToDouble(Lease::throughputKBps) + .sum()); + + assertEquals( + 1002D, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .mapToDouble(Lease::throughputKBps) + .sum()); + } + + // @Test + void loadInMemoryStorageView_assertLeasesAreBalancedWhenDefaultWorkerMetricWorkerIsOverloadedWithMultipleRuns() + throws Exception { + createLeaseAssignmentManager( + getWorkerUtilizationAwareAssignmentConfig(Double.MAX_VALUE, 20), + 100L, + System::nanoTime, + Integer.MAX_VALUE); + + final WorkerMetricStats workerMetrics1 = createDummyDefaultWorkerMetrics("worker1"); + final WorkerMetricStats workerMetrics2 = createDummyTakeWorkerMetrics("worker2"); + + workerMetricsDAO.updateMetrics(workerMetrics1); + workerMetricsDAO.updateMetrics(workerMetrics2); + + final Lease lease1 = createDummyLease("lease1", workerMetrics1.getWorkerId()); + lease1.throughputKBps(1000D); + final Lease lease2 = createDummyLease("lease2", workerMetrics1.getWorkerId()); + lease2.throughputKBps(1000D); + final Lease lease3 = createDummyLease("lease3", workerMetrics2.getWorkerId()); + lease3.throughputKBps(1D); + final Lease lease4 = createDummyLease("lease4", workerMetrics2.getWorkerId()); + lease4.throughputKBps(1D); + + populateLeasesInLeaseTable(lease1, lease2, lease3, lease4); + leaseAssignmentManagerRunnable.run(); + leaseAssignmentManagerRunnable.run(); + leaseAssignmentManagerRunnable.run(); + + assertEquals( + 1, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .count()); + + assertEquals( + 3, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .count()); + + assertEquals( + 1000D, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics1.getWorkerId())) + .mapToDouble(Lease::throughputKBps) + .sum()); + + assertEquals( + 1002D, + leaseRefresher.listLeases().stream() + .filter(lease -> lease.leaseOwner().equals(workerMetrics2.getWorkerId())) + .mapToDouble(Lease::throughputKBps) + .sum()); + } + + private void createAndPutBadWorkerMetricsEntryInTable(final String workerId) { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(WORKER_METRICS_TABLE_NAME) + .item(ImmutableMap.of( + "wid", AttributeValue.builder().s(workerId).build())) + .build(); + + dynamoDbAsyncClient.putItem(putItemRequest); + } + + private void createAndPutBadLeaseEntryInTable(final String leaseKey) { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(LEASE_TABLE_NAME) + .item(ImmutableMap.of( + "leaseKey", AttributeValue.builder().s(leaseKey).build())) + .build(); + + dynamoDbAsyncClient.putItem(putItemRequest); + } + + private LeaseAssignmentManager createLeaseAssignmentManager( + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config, + final Long leaseDurationMillis, + final Supplier nanoTimeProvider, + final int maxLeasesPerWorker) { + + final LeaseAssignmentManager leaseAssignmentManager = new LeaseAssignmentManager( + leaseRefresher, + workerMetricsDAO, + mockLeaderDecider, + config, + TEST_LEADER_WORKER_ID, + leaseDurationMillis, + new NullMetricsFactory(), + scheduledExecutorService, + nanoTimeProvider, + maxLeasesPerWorker, + gracefulLeaseHandoffConfig); + leaseAssignmentManager.start(); + return leaseAssignmentManager; + } + + @NotNull + private static LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig + getWorkerUtilizationAwareAssignmentConfig(final double maxThroughput, final int reBalanceThreshold) { + final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config = + new LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig(); + config.maxThroughputPerHostKBps(maxThroughput); + config.workerMetricsReporterFreqInMillis(Duration.ofHours(1).toMillis()); + config.reBalanceThresholdPercentage(reBalanceThreshold); + config.dampeningPercentage(80); + config.staleWorkerMetricsEntryCleanupDuration(Duration.ofDays(1000)); + config.varianceBalancingFrequency(1); + return config; + } + + private Lease createDummyUnAssignedLease(final String leaseKey) { + final Lease lease = new Lease(); + lease.leaseKey(leaseKey); + lease.checkpoint(TRIM_HORIZON); + return lease; + } + + private Lease createDummyLease(final String leaseKey, final String leaseOwner) { + final Lease lease = createDummyUnAssignedLease(leaseKey); + lease.leaseOwner(leaseOwner); + lease.leaseCounter(123L); + lease.throughputKBps(10D); + return lease; + } + + private WorkerMetricStats createDummyDefaultWorkerMetrics(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of()) + .build(); + } + + private WorkerMetricStats createDummyYieldWorkerMetrics(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(90D, 90D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L))) + .build(); + } + + private WorkerMetricStats createDummyTakeWorkerMetrics(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(50D, 50D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L))) + .build(); + } + + private WorkerMetricStats createDummyWorkerMetrics( + final String workerId, final double value, final long operatingRangeMax) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(value, value))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(operatingRangeMax))) + .build(); + } + + private WorkerMetricStats createWorkerWithFailingWorkerMetric(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(50D, -1D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L))) + .build(); + } + + private WorkerMetricStats createWorkerWithFailingWorkerMetricInPast(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(-1D, 50D, 50D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L))) + .build(); + } + + private WorkerMetricStats createWorkerWithHotWorkerMetricStats(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(90D, 90D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(50L))) + .build(); + } + + private WorkerMetricStats createInActiveWorkerWithNoUtilization(final String workerId) { + // Setting 0 as update time means worker is always expired. + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(0L) + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L))) + .build(); + } + + private void writeToWorkerMetricsTables(final WorkerMetricStats workerMetrics) { + dynamoDbAsyncClient + .putItem(PutItemRequest.builder() + .tableName(WORKER_METRICS_TABLE_NAME) + .item(TableSchema.fromBean(WorkerMetricStats.class).itemToMap(workerMetrics, false)) + .build()) + .join(); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitorTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitorTest.java new file mode 100644 index 000000000..e83189a41 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/ClientVersionChangeMonitorTest.java @@ -0,0 +1,157 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.util.Random; +import java.util.concurrent.ScheduledExecutorService; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.migration.ClientVersionChangeMonitor.ClientVersionChangeCallback; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; + +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY; + +public class ClientVersionChangeMonitorTest { + private ClientVersionChangeMonitor monitorUnderTest; + + private final MetricsFactory nullMetricsFactory = new NullMetricsFactory(); + private final CoordinatorStateDAO mockCoordinatorStateDAO = + Mockito.mock(CoordinatorStateDAO.class, Mockito.RETURNS_MOCKS); + private final ScheduledExecutorService mockScheduler = + Mockito.mock(ScheduledExecutorService.class, Mockito.RETURNS_MOCKS); + private final ClientVersionChangeCallback mockCallback = + Mockito.mock(ClientVersionChangeCallback.class, Mockito.RETURNS_MOCKS); + private final Random mockRandom = Mockito.mock(Random.class, Mockito.RETURNS_MOCKS); + + @BeforeEach + public void setup() { + when(mockRandom.nextDouble()).thenReturn(0.0); + } + + @ParameterizedTest + @CsvSource({ + "CLIENT_VERSION_2X, CLIENT_VERSION_UPGRADE_FROM_2X", + "CLIENT_VERSION_3X_WITH_ROLLBACK, CLIENT_VERSION_2X", + "CLIENT_VERSION_UPGRADE_FROM_2X, CLIENT_VERSION_3X_WITH_ROLLBACK", + "CLIENT_VERSION_3X_WITH_ROLLBACK, CLIENT_VERSION_3X" + }) + public void testMonitor(final ClientVersion currentClientVersion, final ClientVersion changedClientVersion) + throws Exception { + monitorUnderTest = new ClientVersionChangeMonitor( + nullMetricsFactory, + mockCoordinatorStateDAO, + mockScheduler, + mockCallback, + currentClientVersion, + mockRandom); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler).scheduleWithFixedDelay(argumentCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final MigrationState initialState = + new MigrationState(MIGRATION_HASH_KEY, "DUMMY_WORKER").update(currentClientVersion, "DUMMY_WORKER"); + final MigrationState changedState = initialState.copy().update(changedClientVersion, "DUMMY_WORKER2"); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)) + .thenReturn(initialState) + .thenReturn(changedState); + argumentCaptor.getValue().run(); + + verify(mockCallback, never()).accept(anyObject()); + + argumentCaptor.getValue().run(); + + final ArgumentCaptor stateCaptor = ArgumentCaptor.forClass(MigrationState.class); + verify(mockCallback, times(1)).accept(stateCaptor.capture()); + + Assertions.assertEquals(changedClientVersion, stateCaptor.getValue().getClientVersion()); + } + + @Test + public void testCallIsInvokedOnlyOnceIfSuccessful() throws Exception { + monitorUnderTest = new ClientVersionChangeMonitor( + nullMetricsFactory, + mockCoordinatorStateDAO, + mockScheduler, + mockCallback, + ClientVersion.CLIENT_VERSION_2X, + mockRandom); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler).scheduleWithFixedDelay(argumentCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final MigrationState state = new MigrationState(MIGRATION_HASH_KEY, "DUMMY_WORKER") + .update(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X, "DUMMY_WORKER"); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + + argumentCaptor.getValue().run(); + verify(mockCallback, times(1)).accept(anyObject()); + reset(mockCallback); + + argumentCaptor.getValue().run(); + verify(mockCallback, never()).accept(anyObject()); + } + + @Test + public void testCallIsInvokedAgainIfFailed() throws Exception { + monitorUnderTest = new ClientVersionChangeMonitor( + nullMetricsFactory, + mockCoordinatorStateDAO, + mockScheduler, + mockCallback, + ClientVersion.CLIENT_VERSION_2X, + mockRandom); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler).scheduleWithFixedDelay(argumentCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final MigrationState state = new MigrationState(MIGRATION_HASH_KEY, "DUMMY_WORKER") + .update(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X, "DUMMY_WORKER"); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + + doThrow(new InvalidStateException("test exception")).when(mockCallback).accept(any()); + + argumentCaptor.getValue().run(); + verify(mockCallback, times(1)).accept(anyObject()); + reset(mockCallback); + + argumentCaptor.getValue().run(); + verify(mockCallback, times(1)).accept(anyObject()); + reset(mockCallback); + + argumentCaptor.getValue().run(); + verify(mockCallback, times(0)).accept(anyObject()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitorTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitorTest.java new file mode 100644 index 000000000..8eecdebdf --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationReadyMonitorTest.java @@ -0,0 +1,602 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.MultiStreamLease; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStats; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@Slf4j +public class MigrationReadyMonitorTest { + private static final String WORKER_ID = "MigrationReadyMonitorTestWorker0"; + private static final String DUMMY_STREAM_NAME = "DummyStreamName"; + private static final long WORKER_METRICS_EXPIRY_SECONDS = 60L; + + private MigrationReadyMonitor monitorUnderTest; + + private final MetricsFactory nullMetricsFactory = new NullMetricsFactory(); + private final Callable mockTimeProvider = mock(Callable.class, Mockito.RETURNS_MOCKS); + + private final LeaderDecider mockLeaderDecider = mock(LeaderDecider.class, Mockito.RETURNS_MOCKS); + + private final WorkerMetricStatsDAO mockWorkerMetricsDao = mock(WorkerMetricStatsDAO.class, Mockito.RETURNS_MOCKS); + + private final LeaseRefresher mockLeaseRefresher = mock(LeaseRefresher.class, Mockito.RETURNS_MOCKS); + + private final ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class, Mockito.RETURNS_MOCKS); + + private final Runnable mockRunnableCallback = mock(Runnable.class, Mockito.RETURNS_MOCKS); + + @BeforeEach + public void setup() { + monitorUnderTest = new MigrationReadyMonitor( + nullMetricsFactory, + mockTimeProvider, + mockLeaderDecider, + WORKER_ID, + mockWorkerMetricsDao, + WORKER_METRICS_EXPIRY_SECONDS, + mockLeaseRefresher, + mockScheduler, + mockRunnableCallback, + 0 /* stabilization duration - 0, to let the callback happen right away */); + } + + @ParameterizedTest + @ValueSource(strings = {"WORKER_READY_CONDITION_MET", "WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY"}) + public void verifyNonLeaderDoesNotPerformMigrationChecks(final TestDataType testDataType) throws Exception { + final TestData data = TEST_DATA_MAP.get(testDataType); + when(mockTimeProvider.call()).thenReturn(1000L); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(false); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + runnableArgumentCaptor.getValue().run(); + + verify(mockRunnableCallback, never()).run(); + } + + @ParameterizedTest + @ValueSource(strings = {"WORKER_READY_CONDITION_MET", "WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY"}) + public void verifyLeaderPerformMigrationChecks(final TestDataType testDataType) throws Exception { + final TestData data = TEST_DATA_MAP.get(testDataType); + when(mockTimeProvider.call()).thenReturn(1000L); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + runnableArgumentCaptor.getValue().run(); + + verify(mockRunnableCallback, times(1)).run(); + } + + @ParameterizedTest + @CsvSource({ + "false, WORKER_READY_CONDITION_MET", + "false, WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_ZERO_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_ALL_INACTIVE_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_INACTIVE_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_NO_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_INACTIVE_WORKER_STATS", + "true, WORKER_READY_CONDITION_NOT_MET_MULTISTREAM_MODE_SANITY" + }) + public void testReadyConditionNotMetDoesNotInvokeCallback(final boolean gsiReady, final TestDataType testDataType) + throws Exception { + final TestData data = TEST_DATA_MAP.get(testDataType); + when(mockTimeProvider.call()).thenReturn(80 * 1000L); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(gsiReady); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + runnableArgumentCaptor.getValue().run(); + + verify(mockRunnableCallback, never()).run(); + } + + @Test + public void testExpiredLeaseOwner() throws Exception { + final TestData data1 = TEST_DATA_MAP.get( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_INACTIVE_WORKER_STATS); + final TestData data2 = + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET_AFTER_EXPIRED_LEASES_ARE_REASSIGNED); + + when(mockTimeProvider.call()).thenReturn(80 * 1000L); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()) + .thenReturn(data1.workerMetrics) + .thenReturn(data2.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data1.leaseList).thenReturn(data2.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + runnableArgumentCaptor.getValue().run(); + + verify(mockRunnableCallback, times(0)).run(); + + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(1)).run(); + } + + @Test + public void testInactiveToActiveWorkerMetricsCausesMonitorToSucceed() throws Exception { + final TestData data1 = + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_ALL_INACTIVE_WORKER_STATS); + final TestData data2 = TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET); + + when(mockTimeProvider.call()).thenReturn(80 * 1000L); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()) + .thenReturn(data1.workerMetrics) + .thenReturn(data2.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data1.leaseList).thenReturn(data2.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + runnableArgumentCaptor.getValue().run(); + + verify(mockRunnableCallback, times(0)).run(); + + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(1)).run(); + } + + @ParameterizedTest + @ValueSource(longs = {12, 30, 60, 180}) + public void testTriggerStability(final long stabilityDurationInSeconds) throws Exception { + monitorUnderTest = new MigrationReadyMonitor( + nullMetricsFactory, + mockTimeProvider, + mockLeaderDecider, + WORKER_ID, + mockWorkerMetricsDao, + WORKER_METRICS_EXPIRY_SECONDS, + mockLeaseRefresher, + mockScheduler, + mockRunnableCallback, + stabilityDurationInSeconds); + + final TestData data = TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET); + + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + // Test 2: callback is only invoked after trigger being true consecutively for the configured + // time + long testTime = + Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME - 200).toMillis(); + for (int i = 0; i <= stabilityDurationInSeconds; i++) { + verify(mockRunnableCallback, times(0)).run(); + when(mockTimeProvider.call()).thenReturn(testTime + i * 1000L); + runnableArgumentCaptor.getValue().run(); + } + verify(mockRunnableCallback, times(1)).run(); + reset(mockRunnableCallback); + + // Test 2: If leader changes the timer starts over + testTime = + Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME - 600).toMillis(); + for (int i = 0; i < stabilityDurationInSeconds / 2; i++) { + verify(mockRunnableCallback, times(0)).run(); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + + if (i == stabilityDurationInSeconds / 3) { + reset(mockLeaderDecider); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(false); + } + } + verify(mockRunnableCallback, times(0)).run(); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + for (int j = (int) stabilityDurationInSeconds / 2; j <= 3 * stabilityDurationInSeconds / 2; j++) { + verify(mockRunnableCallback, times(0)).run(); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + } + + verify(mockRunnableCallback, times(1)).run(); + reset(mockRunnableCallback); + + // reset flag by making worker stats expire + when(mockWorkerMetricsDao.getAllWorkerMetricStats()) + .thenReturn(TEST_DATA_MAP.get( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_INACTIVE_WORKER_STATS) + .workerMetrics); + when(mockLeaseRefresher.listLeases()) + .thenReturn(TEST_DATA_MAP.get( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_INACTIVE_WORKER_STATS) + .leaseList); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(0)).run(); + + // Use active worker stats for rest of the test + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + // Test 2: If trigger toggles back and forth + // Trigger is true 5 times in a row + for (int i = 0; i < 5; i++) { + verify(mockRunnableCallback, times(0)).run(); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + } + // and then false thrice + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(false); + for (int i = 0; i < 3; i++) { + verify(mockRunnableCallback, times(0)).run(); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + } + + // and then true until stabilityDurationInSeconds, but callback should not be invoked + // until after 8 more invocations. + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + for (int i = 8; i <= (stabilityDurationInSeconds + 8); i++) { + verify(mockRunnableCallback, times(0)).run(); + testTime = testTime + 1000L; + when(mockTimeProvider.call()).thenReturn(testTime); + runnableArgumentCaptor.getValue().run(); + } + verify(mockRunnableCallback, times(1)).run(); + } + + /** + * Test that when workers stats are just expired, its valid for 60 seconds, at 61 seconds + * it should be considered expired. + */ + @Test + public void testWorkerMetricsExpiryBoundaryConditions() throws Exception { + final TestData data = TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET); + // Each run of monitor calls timeProvider twice + when(mockTimeProvider.call()) + .thenReturn( + Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME).toMillis()) + .thenReturn( + Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME).toMillis()) + .thenReturn(Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME + 59) + .toMillis()) + .thenReturn(Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME + 59) + .toMillis()) + .thenReturn(Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME + 60) + .toMillis()) + .thenReturn(Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME + 60) + .toMillis()) + .thenReturn(Duration.ofSeconds(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME + 61) + .toMillis()); + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()).thenReturn(data.workerMetrics); + when(mockLeaseRefresher.listLeases()).thenReturn(data.leaseList); + + monitorUnderTest.startMonitor(); + final ArgumentCaptor runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockScheduler) + .scheduleWithFixedDelay(runnableArgumentCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)); + + // At 0 seconds, WorkerMetricStats are valid + reset(mockRunnableCallback); + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(1)).run(); + + // At 59 seconds, WorkerMetricStats are valid + reset(mockRunnableCallback); + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(1)).run(); + + // At 60 seconds, WorkerMetricStats have expired + reset(mockRunnableCallback); + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(0)).run(); + + // At 61 seconds, WorkerMetricStats have expired + reset(mockRunnableCallback); + runnableArgumentCaptor.getValue().run(); + verify(mockRunnableCallback, times(0)).run(); + } + + @RequiredArgsConstructor + @Getter + public static class TestData { + private final List leaseList; + private final List workerMetrics; + } + + private static final long EXPIRED_WORKER_STATS_LAST_UPDATE_TIME = 10L; + private static final long ACTIVE_WORKER_STATS_LAST_UPDATE_TIME = 10000L; + + public enum TestDataType { + WORKER_READY_CONDITION_MET, + WORKER_READY_CONDITION_NOT_MET_WITH_ZERO_WORKER_STATS, + WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_WORKER_STATS, + WORKER_READY_CONDITION_NOT_MET_WITH_ALL_INACTIVE_WORKER_STATS, + WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_INACTIVE_WORKER_STATS, + WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_NO_WORKER_STATS, + WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_INACTIVE_WORKER_STATS, + WORKER_READY_CONDITION_MET_AFTER_EXPIRED_LEASES_ARE_REASSIGNED, + WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY, + WORKER_READY_CONDITION_NOT_MET_MULTISTREAM_MODE_SANITY, + } + + public static final HashMap TEST_DATA_MAP = new HashMap<>(); + + @BeforeAll + public static void populateTestDataMap() { + final int numWorkers = 10; + final Random random = new Random(); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_MET, + new TestData( + IntStream.range(0, 100) + .mapToObj(i -> new Lease( + "shardId-000000000" + i, + "MigrationReadyMonitorTestWorker" + random.nextInt(numWorkers), + random.nextLong(), + UUID.randomUUID(), + System.nanoTime(), + ExtendedSequenceNumber.TRIM_HORIZON, + null, + random.nextLong(), + null, + null, + null, + null)) + .collect(Collectors.toList()), + IntStream.range(0, 10) + .mapToObj(i -> WorkerMetricStats.builder() + .workerId("MigrationReadyMonitorTestWorker" + i) + .lastUpdateTime(ACTIVE_WORKER_STATS_LAST_UPDATE_TIME) + .build()) + .collect(Collectors.toList()))); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_ZERO_WORKER_STATS, + new TestData(TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).leaseList, new ArrayList<>())); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_WORKER_STATS, + new TestData( + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).leaseList, + IntStream.range(0, 5) + .mapToObj(i -> TEST_DATA_MAP + .get(TestDataType.WORKER_READY_CONDITION_MET) + .workerMetrics + .get(random.nextInt(10))) + .collect(Collectors.toList()))); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_ALL_INACTIVE_WORKER_STATS, + new TestData( + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).leaseList, + IntStream.range(0, 10) + .mapToObj(i -> WorkerMetricStats.builder() + .workerId("MigrationReadyMonitorTestWorker" + i) + .lastUpdateTime(EXPIRED_WORKER_STATS_LAST_UPDATE_TIME) + .build()) + .collect(Collectors.toList()))); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_PARTIAL_INACTIVE_WORKER_STATS, + new TestData( + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).leaseList, + IntStream.range(0, 10) + .mapToObj(i -> WorkerMetricStats.builder() + .workerId("MigrationReadyMonitorTestWorker" + i) + .lastUpdateTime( + random.nextDouble() > 0.5 + ? EXPIRED_WORKER_STATS_LAST_UPDATE_TIME + : ACTIVE_WORKER_STATS_LAST_UPDATE_TIME) // Some are active, some + // inactive + .build()) + .collect(Collectors.toList()))); + + final ArrayList newLeaseList = + new ArrayList<>(TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).leaseList); + // add some leases for another worker + IntStream.range(0, 5) + .mapToObj(i -> new Lease( + "shardId-100000000" + i, + "ExpiredLeaseWorker", + 100L, + UUID.randomUUID(), + 100L, + ExtendedSequenceNumber.TRIM_HORIZON, + null, + 5L, + null, + null, + null, + null)) + .forEach(newLeaseList::add); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_NO_WORKER_STATS, + new TestData(newLeaseList, TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).workerMetrics)); + + final ArrayList newWorkerMetrics = + new ArrayList<>(TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).workerMetrics); + newWorkerMetrics.add(WorkerMetricStats.builder() + .workerId("ExpiredLeaseWorker") + .lastUpdateTime(EXPIRED_WORKER_STATS_LAST_UPDATE_TIME) + .build()); + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_INACTIVE_WORKER_STATS, + new TestData( + TEST_DATA_MAP.get( + TestDataType + .WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_NO_WORKER_STATS) + .leaseList, + newWorkerMetrics)); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_MET_AFTER_EXPIRED_LEASES_ARE_REASSIGNED, + new TestData( + TEST_DATA_MAP + .get( + TestDataType + .WORKER_READY_CONDITION_NOT_MET_WITH_EXPIRED_LEASES_AND_NO_WORKER_STATS) + .leaseList + .stream() + .map(lease -> { + final Lease newLease; + if (lease.leaseOwner().equals("ExpiredLeaseWorker")) { + newLease = new Lease( + lease.leaseKey(), + "MigrationReadyMonitorTestWorker" + random.nextInt(numWorkers), + lease.leaseCounter(), + lease.concurrencyToken(), + lease.lastCounterIncrementNanos(), + lease.checkpoint(), + lease.pendingCheckpoint(), + lease.ownerSwitchesSinceCheckpoint(), + lease.parentShardIds(), + lease.childShardIds(), + lease.pendingCheckpointState(), + lease.hashKeyRangeForLease()); + } else { + newLease = new Lease( + lease.leaseKey(), + lease.leaseOwner(), + lease.leaseCounter(), + lease.concurrencyToken(), + lease.lastCounterIncrementNanos(), + lease.checkpoint(), + lease.pendingCheckpoint(), + lease.ownerSwitchesSinceCheckpoint(), + lease.parentShardIds(), + lease.childShardIds(), + lease.pendingCheckpointState(), + lease.hashKeyRangeForLease()); + } + return newLease; + }) + .collect(Collectors.toList()), + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).workerMetrics)); + + final int numStreams = 3; + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY, + new TestData( + IntStream.range(0, 100) + .mapToObj(i -> { + final int streamId = random.nextInt(numStreams); + final int workerId = random.nextInt(numWorkers); + final MultiStreamLease m = new MultiStreamLease(); + m.leaseKey(DUMMY_STREAM_NAME + streamId + ":shardId-00000000" + i); + m.streamIdentifier(DUMMY_STREAM_NAME + streamId); + m.shardId("shardId-00000000" + i); + m.leaseOwner("MigrationReadyMonitorTestWorker" + workerId); + m.leaseCounter(random.nextLong()); + m.concurrencyToken(UUID.randomUUID()); + m.lastCounterIncrementNanos(System.nanoTime()); + m.checkpoint(ExtendedSequenceNumber.TRIM_HORIZON); + m.ownerSwitchesSinceCheckpoint(random.nextLong()); + return m; + }) + .collect(Collectors.toList()), + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET).workerMetrics)); + + TEST_DATA_MAP.put( + TestDataType.WORKER_READY_CONDITION_NOT_MET_MULTISTREAM_MODE_SANITY, + new TestData( + TEST_DATA_MAP.get(TestDataType.WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY).leaseList, + IntStream.range(0, 5) + .mapToObj(i -> TEST_DATA_MAP + .get(TestDataType.WORKER_READY_CONDITION_MET_MULTISTREAM_MODE_SANITY) + .workerMetrics + .get(random.nextInt(10))) + .collect(Collectors.toList()))); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineTest.java new file mode 100644 index 000000000..88e163806 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/coordinator/migration/MigrationStateMachineTest.java @@ -0,0 +1,316 @@ +/* + * Copyright 2024 Amazon.com, Inc. or its affiliates. + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package software.amazon.kinesis.coordinator.migration; + +import java.time.Duration; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ScheduledExecutorService; + +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.coordinator.migration.MigrationReadyMonitorTest.TestDataType; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO; + +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyMap; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.timeout; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY; + +@Slf4j +public class MigrationStateMachineTest { + private static final String WORKER_ID = "MigrationStateMachineTestWorker"; + private static final long WORKER_STATS_EXPIRY_SECONDS = 1L; + + private MigrationStateMachine stateMachineUnderTest; + private final MetricsFactory nullMetricsFactory = new NullMetricsFactory(); + private final Callable mockTimeProvider = mock(Callable.class, Mockito.RETURNS_MOCKS); + private final LeaderDecider mockLeaderDecider = mock(LeaderDecider.class, Mockito.RETURNS_MOCKS); + private final CoordinatorStateDAO mockCoordinatorStateDAO = + Mockito.mock(CoordinatorStateDAO.class, Mockito.RETURNS_MOCKS); + private final WorkerMetricStatsDAO mockWorkerMetricsDao = mock(WorkerMetricStatsDAO.class, Mockito.RETURNS_MOCKS); + private final LeaseRefresher mockLeaseRefresher = mock(LeaseRefresher.class, Mockito.RETURNS_MOCKS); + private final DynamicMigrationComponentsInitializer mockInitializer = + mock(DynamicMigrationComponentsInitializer.class, Mockito.RETURNS_MOCKS); + private final ScheduledExecutorService mockMigrationStateMachineThreadPool = + mock(ScheduledExecutorService.class, Mockito.RETURNS_MOCKS); + private final Random mockRandom = Mockito.mock(Random.class, Mockito.RETURNS_MOCKS); + + @BeforeEach + public void setup() throws Exception { + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(null); + when(mockCoordinatorStateDAO.createCoordinatorStateIfNotExists(any())).thenReturn(true); + when(mockLeaderDecider.isLeader(eq(WORKER_ID))).thenReturn(true); + when(mockTimeProvider.call()).thenReturn(10000L); + when(mockInitializer.leaderDecider()).thenReturn(mockLeaderDecider); + when(mockInitializer.workerIdentifier()).thenReturn(WORKER_ID); + when(mockInitializer.workerMetricsDAO()).thenReturn(mockWorkerMetricsDao); + when(mockInitializer.workerMetricsExpirySeconds()).thenReturn(WORKER_STATS_EXPIRY_SECONDS); + when(mockInitializer.leaseRefresher()).thenReturn(mockLeaseRefresher); + } + + @BeforeAll + public static void beforeAll() { + MigrationReadyMonitorTest.populateTestDataMap(); + } + + private MigrationStateMachine getStateMachineUnderTest(final ClientVersionConfig config) + throws DependencyException { + final MigrationStateMachine migrationStateMachine = new MigrationStateMachineImpl( + nullMetricsFactory, + mockTimeProvider, + mockCoordinatorStateDAO, + mockMigrationStateMachineThreadPool, + config, + mockRandom, + mockInitializer, + WORKER_ID, + Duration.ofMinutes(0).getSeconds()); + migrationStateMachine.initialize(); + return migrationStateMachine; + } + + @ParameterizedTest + @CsvSource({ + "CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X, CLIENT_VERSION_UPGRADE_FROM_2X", + "CLIENT_VERSION_CONFIG_3X, CLIENT_VERSION_3X" + }) + public void testStateMachineInitialization( + final ClientVersionConfig config, final ClientVersion expectedStateMachineState) throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(config); + Assertions.assertEquals(expectedStateMachineState, stateMachineUnderTest.getCurrentClientVersion()); + } + + @Test + public void testMigrationReadyFlip() throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X); + + // After initialization, state machine should start to monitor for upgrade readiness + final ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(2)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + initiateAndTestFlip(runnableCaptor); + } + + @Test + public void testRollbackAfterFlip() throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X); + + // After initialization, state machine should start to monitor for upgrade readiness + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(2)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + initiateAndTestFlip(runnableCaptor); + + // A new version monitor must be created after flip + runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, timeout(100).times(1)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final Runnable rollbackMonitorRunnable = runnableCaptor.getValue(); + initiateAndTestRollBack(rollbackMonitorRunnable); + } + + @Test + public void testRollForward() throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X); + + // After initialization, state machine should start to monitor for upgrade readiness + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(2)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + initiateAndTestFlip(runnableCaptor); + + // A new version monitor must be created after flip + runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(1)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final Runnable rollbackMonitorRunnable = runnableCaptor.getValue(); + initiateAndTestRollBack(rollbackMonitorRunnable); + + // A new version monitor must be created after rollback + runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(1)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final Runnable rollforwardMonitorRunnable = runnableCaptor.getValue(); + initiateAndTestRollForward(rollforwardMonitorRunnable); + } + + @Test + public void testRollbackBeforeFlip() throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X); + + // After initialization, state machine should start to monitor for upgrade readiness + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(2)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + initiateAndTestRollbackBeforeFlip(runnableCaptor); + } + + @Test + public void successfulUpgradeAfterFlip() throws Exception { + stateMachineUnderTest = getStateMachineUnderTest(ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X); + + // After initialization, state machine should start to monitor for upgrade readiness + ArgumentCaptor runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, times(2)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + initiateAndTestFlip(runnableCaptor); + + // A new version monitor must be created after flip + runnableCaptor = ArgumentCaptor.forClass(Runnable.class); + verify(mockMigrationStateMachineThreadPool, timeout(100).times(1)) + .scheduleWithFixedDelay(runnableCaptor.capture(), anyLong(), anyLong(), anyObject()); + + final Runnable successfulUpgradeMonitor = runnableCaptor.getValue(); + + initiateAndTestSuccessfulUpgrade(successfulUpgradeMonitor); + } + + private void initiateAndTestFlip(final ArgumentCaptor runnableCaptor) throws Exception { + final Runnable migrationReadyMonitorRunnable = + runnableCaptor.getAllValues().get(0) instanceof MigrationReadyMonitor + ? runnableCaptor.getAllValues().get(0) + : runnableCaptor.getAllValues().get(1); + + final Runnable versionChangeMonitorRunnable = + runnableCaptor.getAllValues().get(0) instanceof ClientVersionChangeMonitor + ? runnableCaptor.getAllValues().get(0) + : runnableCaptor.getAllValues().get(1); + + when(mockLeaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()).thenReturn(true); + when(mockLeaseRefresher.listLeases()) + .thenReturn(MigrationReadyMonitorTest.TEST_DATA_MAP + .get(TestDataType.WORKER_READY_CONDITION_MET) + .getLeaseList()); + when(mockWorkerMetricsDao.getAllWorkerMetricStats()) + .thenReturn(MigrationReadyMonitorTest.TEST_DATA_MAP + .get(TestDataType.WORKER_READY_CONDITION_MET) + .getWorkerMetrics()); + + // during flip, the migrationReady callback handling will update MigrationState. + // mock a successful update of MigrationState and return the captured state back + // when clientVersion change monitor tried to read the value from DDB. + + final ArgumentCaptor stateCaptor = ArgumentCaptor.forClass(MigrationState.class); + when(mockCoordinatorStateDAO.updateCoordinatorStateWithExpectation(stateCaptor.capture(), anyMap())) + .thenReturn(true); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)) + .thenAnswer(invocation -> stateCaptor.getValue()); + + reset(mockMigrationStateMachineThreadPool); + log.info("TestLog ----------- Initiate a flip -------------"); + // Invoke the monitor callbacks so the version flips to 3.x with rollback + migrationReadyMonitorRunnable.run(); + Assertions.assertEquals( + ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK, + stateCaptor.getValue().getClientVersion()); + + versionChangeMonitorRunnable.run(); + Assertions.assertEquals( + ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK, stateMachineUnderTest.getCurrentClientVersion()); + + verify(mockInitializer) + .initializeClientVersionFor3xWithRollback(eq(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X)); + log.info("TestLog ----------- flip done -------------"); + } + + private void initiateAndTestRollbackBeforeFlip(final ArgumentCaptor runnableCaptor) throws Exception { + final Runnable versionChangeMonitorRunnable = + runnableCaptor.getAllValues().get(0) instanceof ClientVersionChangeMonitor + ? runnableCaptor.getAllValues().get(0) + : runnableCaptor.getAllValues().get(1); + + final MigrationState state = + new MigrationState(MIGRATION_HASH_KEY, WORKER_ID).update(ClientVersion.CLIENT_VERSION_2X, WORKER_ID); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + reset(mockMigrationStateMachineThreadPool); + reset(mockInitializer); + log.info("TestLog ----------- Initiate rollback before flip -------------"); + versionChangeMonitorRunnable.run(); + log.info("TestLog ----------- rollback before flip done -------------"); + Assertions.assertEquals(ClientVersion.CLIENT_VERSION_2X, stateMachineUnderTest.getCurrentClientVersion()); + verify(mockInitializer).initializeClientVersionFor2x(eq(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X)); + } + + private void initiateAndTestRollBack(final Runnable rollbackMonitorRunnable) throws Exception { + final MigrationState state = + new MigrationState(MIGRATION_HASH_KEY, WORKER_ID).update(ClientVersion.CLIENT_VERSION_2X, WORKER_ID); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + reset(mockMigrationStateMachineThreadPool); + reset(mockInitializer); + log.info("TestLog ----------- Initiate rollback -------------"); + rollbackMonitorRunnable.run(); + log.info("TestLog ----------- rollback done -------------"); + Assertions.assertEquals(ClientVersion.CLIENT_VERSION_2X, stateMachineUnderTest.getCurrentClientVersion()); + verify(mockInitializer).initializeClientVersionFor2x(eq(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK)); + } + + private void initiateAndTestRollForward(final Runnable rollforwardMonitorRunnable) throws Exception { + final MigrationState state = new MigrationState(MIGRATION_HASH_KEY, WORKER_ID) + .update(ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X, WORKER_ID); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + reset(mockMigrationStateMachineThreadPool); + reset(mockInitializer); + log.info("TestLog ----------- Initiate roll-forward -------------"); + rollforwardMonitorRunnable.run(); + log.info("TestLog ----------- roll-forward done -------------"); + Assertions.assertEquals( + ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X, stateMachineUnderTest.getCurrentClientVersion()); + verify(mockInitializer).initializeClientVersionForUpgradeFrom2x(eq(ClientVersion.CLIENT_VERSION_2X)); + } + + private void initiateAndTestSuccessfulUpgrade(final Runnable successfulUpgradeMonitor) throws Exception { + final MigrationState state = + new MigrationState(MIGRATION_HASH_KEY, WORKER_ID).update(ClientVersion.CLIENT_VERSION_3X, WORKER_ID); + when(mockCoordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY)).thenReturn(state); + reset(mockMigrationStateMachineThreadPool); + reset(mockInitializer); + log.info("TestLog ----------- Initiate successful upgrade -------------"); + successfulUpgradeMonitor.run(); + log.info("TestLog ----------- successful upgrade done -------------"); + Assertions.assertEquals(ClientVersion.CLIENT_VERSION_3X, stateMachineUnderTest.getCurrentClientVersion()); + verify(mockInitializer).initializeClientVersionFor3x(ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDeciderTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDeciderTest.java new file mode 100644 index 000000000..cc000217d --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/DynamoDBLockBasedLeaderDeciderTest.java @@ -0,0 +1,175 @@ +package software.amazon.kinesis.leader; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.amazonaws.services.dynamodbv2.local.shared.access.AmazonDynamoDBLocal; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.kinesis.coordinator.CoordinatorConfig; +import software.amazon.kinesis.coordinator.CoordinatorState; +import software.amazon.kinesis.coordinator.CoordinatorStateDAO; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.metrics.NullMetricsFactory; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class DynamoDBLockBasedLeaderDeciderTest { + + private static final String TEST_LOCK_TABLE_NAME = "IAmTestLockTable"; + + private final AmazonDynamoDBLocal dynamoDBEmbedded = DynamoDBEmbedded.create(); + private final DynamoDbAsyncClient dynamoDBAsyncClient = dynamoDBEmbedded.dynamoDbAsyncClient(); + private final DynamoDbClient dynamoDBSyncClient = dynamoDBEmbedded.dynamoDbClient(); + private final Map workerIdToLeaderDeciderMap = new HashMap<>(); + + @BeforeEach + void setup() throws DependencyException { + final CoordinatorConfig c = new CoordinatorConfig("TestApplication"); + c.coordinatorStateTableConfig().tableName(TEST_LOCK_TABLE_NAME); + final CoordinatorStateDAO dao = new CoordinatorStateDAO(dynamoDBAsyncClient, c.coordinatorStateTableConfig()); + dao.initialize(); + IntStream.range(0, 10).sequential().forEach(index -> { + final String workerId = getWorkerId(index); + workerIdToLeaderDeciderMap.put( + workerId, + DynamoDBLockBasedLeaderDecider.create(dao, workerId, 100L, 10L, new NullMetricsFactory())); + }); + + workerIdToLeaderDeciderMap.values().forEach(DynamoDBLockBasedLeaderDecider::initialize); + } + + private static String getWorkerId(final int index) { + return "worker" + index; + } + + @Test + void isLeader_multipleWorkerTryingLock_assertOnlySingleOneAcquiringLock() { + final AtomicInteger atomicInteger = new AtomicInteger(0); + workerIdToLeaderDeciderMap.entrySet().stream().parallel().forEach(entry -> { + if (entry.getValue().isLeader(entry.getKey())) { + atomicInteger.incrementAndGet(); + } + }); + + assertEquals(1, atomicInteger.get(), "Multiple workers were able to get lock"); + } + + @Test + void isLeader_sameWorkerChecksLeadershipSeveralTime_assertTrueInAllCases() { + final String workerId = getWorkerId(1); + final DynamoDBLockBasedLeaderDecider decider = workerIdToLeaderDeciderMap.get(workerId); + for (int i = 0; i < 5; ++i) { + assertTrue(decider.isLeader(workerId)); + } + decider.shutdown(); + + final GetItemRequest getItemRequest = GetItemRequest.builder() + .tableName(TEST_LOCK_TABLE_NAME) + .key(Collections.singletonMap( + CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, + AttributeValue.builder() + .s(CoordinatorState.LEADER_HASH_KEY) + .build())) + .build(); + final GetItemResponse getItemResult = dynamoDBSyncClient.getItem(getItemRequest); + // assert that after shutdown the lockItem is no longer present. + assertFalse(getItemResult.hasItem()); + + // After shutdown, assert that leaderDecider returns false. + assertFalse(decider.isLeader(workerId), "LeaderDecider did not return false after shutdown."); + } + + @Test + void isLeader_staleLeaderLock_assertLockTakenByAnotherWorker() throws InterruptedException { + final String workerId = getWorkerId(1); + final DynamoDBLockBasedLeaderDecider decider = workerIdToLeaderDeciderMap.get(workerId); + + createRandomStaleLockEntry(); + + // The First time we check the library starts in-memory counter for expiry tracking and does not get lock + assertFalse(decider.isLeader(workerId), workerId + " got lock which is not expected as this is the first call"); + // lock lease Duration is 100ms sleep for 200ms to let the lock expire + Thread.sleep(200); + // another worker is able to take the lock now. + assertTrue(decider.isLeader(workerId), workerId + " did not get the expired lock"); + } + + @Test + void isAnyLeaderElected_sanity() throws InterruptedException { + final String workerId = getWorkerId(1); + final DynamoDBLockBasedLeaderDecider decider = workerIdToLeaderDeciderMap.get(workerId); + + assertFalse(decider.isAnyLeaderElected(), "isAnyLeaderElected returns true when no leader lock is present"); + + // perform leaderElection + decider.isLeader(workerId); + Thread.sleep(120); + + assertTrue(decider.isAnyLeaderElected(), "isAnyLeaderElected returns false when leader lock is present"); + + Thread.sleep(120); + // heartbeat is happening on the leader validate that on different identifying different RevisionNumber, + // lock is considered ACTIVE + assertTrue(decider.isAnyLeaderElected(), "isAnyLeaderElected returns false when leader lock is present"); + } + + @Test + void isAnyLeaderElected_staleLock_validateExpectedBehavior() throws InterruptedException { + final String workerId = getWorkerId(1); + final DynamoDBLockBasedLeaderDecider decider = workerIdToLeaderDeciderMap.get(workerId); + + createRandomStaleLockEntry(); + + assertTrue(decider.isAnyLeaderElected(), "isAnyLeaderElected returns false when leader lock is present"); + + // sleep for more than lease duration + Thread.sleep(205); + + // lock has become stale as it passed lease duration without any heartbeat + assertFalse(decider.isAnyLeaderElected(), "isAnyLeaderElected returns true when leader lock is stale"); + } + + @Test + void isAnyLeaderElected_withoutTable_assertFalse() { + final String workerId = getWorkerId(1); + final DynamoDBLockBasedLeaderDecider decider = workerIdToLeaderDeciderMap.get(workerId); + + dynamoDBSyncClient.deleteTable( + DeleteTableRequest.builder().tableName(TEST_LOCK_TABLE_NAME).build()); + assertFalse(decider.isAnyLeaderElected(), "isAnyLeaderElected returns true when table don't exists"); + } + + private void createRandomStaleLockEntry() { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(TEST_LOCK_TABLE_NAME) + .item(ImmutableMap.of( + CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, + AttributeValue.builder() + .s(CoordinatorState.LEADER_HASH_KEY) + .build(), + "leaseDuration", + AttributeValue.builder().s("200").build(), + "ownerName", + AttributeValue.builder().s(UUID.randomUUID().toString()).build(), + "recordVersionNumber", + AttributeValue.builder().s(UUID.randomUUID().toString()).build())) + .build(); + dynamoDBSyncClient.putItem(putItemRequest); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDeciderTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDeciderTest.java new file mode 100644 index 000000000..e95eba5d8 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leader/MigrationAdaptiveLeaderDeciderTest.java @@ -0,0 +1,106 @@ +package software.amazon.kinesis.leader; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.kinesis.coordinator.LeaderDecider; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider; +import software.amazon.kinesis.metrics.NullMetricsFactory; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +class MigrationAdaptiveLeaderDeciderTest { + + private static final String TEST_RANDOM_WORKER_ID = "IAmRandomWorkerId"; + + @Mock + private LeaderDecider kcl3xLeaderDecider; + + @Mock + private LeaderDecider kcl2xLeaderDecider; + + private MigrationAdaptiveLeaderDecider migrationAdaptiveLeaderDecider; + + @BeforeEach + void setup() { + MockitoAnnotations.initMocks(this); + } + + private void createLeaderDecider(final LeaderDecider currentLeaderDecider) { + this.migrationAdaptiveLeaderDecider = new MigrationAdaptiveLeaderDecider(new NullMetricsFactory()); + this.migrationAdaptiveLeaderDecider.updateLeaderDecider(currentLeaderDecider); + } + + @Test + void isLeader_modeProviderReturnsKCL3_X_assertNewLeaderDecideInvoked() { + when(kcl3xLeaderDecider.isLeader(anyString())).thenReturn(true); + when(kcl2xLeaderDecider.isLeader(anyString())).thenReturn(false); + + createLeaderDecider(kcl3xLeaderDecider); + final boolean response = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + + assertTrue(response, "kcl3_xLeaderDecider didn't elected leader"); + verify(kcl3xLeaderDecider, times(1)).isLeader(anyString()); + verify(kcl2xLeaderDecider, times(0)).isLeader(anyString()); + } + + @Test + void isLeader_modeProviderReturnsKCL2_X_assertNewLeaderDecideInvoked() { + when(kcl3xLeaderDecider.isLeader(anyString())).thenReturn(false); + when(kcl2xLeaderDecider.isLeader(anyString())).thenReturn(true); + + createLeaderDecider(kcl2xLeaderDecider); + final boolean response = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + + assertTrue(response, "kcl2_xLeaderDecider didn't elected leader"); + verify(kcl3xLeaderDecider, times(0)).isLeader(anyString()); + verify(kcl2xLeaderDecider, times(1)).isLeader(anyString()); + } + + @Test + void isLeader_transitionFromKCL2_XTo3_X_assertSwitchInTransition() { + // kcl3_xLeaderDecider returns true always in this mock + when(kcl3xLeaderDecider.isLeader(anyString())).thenReturn(true); + // kcl2_xLeaderDecider returns false always in this mock + when(kcl2xLeaderDecider.isLeader(anyString())).thenReturn(false); + final MigrationAdaptiveLeaseAssignmentModeProvider mockModeProvider = + mock(MigrationAdaptiveLeaseAssignmentModeProvider.class); + createLeaderDecider(kcl2xLeaderDecider); + + final boolean responseFirst = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + final boolean responseSecond = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + assertFalse(responseFirst); + assertFalse(responseSecond); + + // validate 2 calls to kcl2_xLeaderDecider and 0 calls gone to kcl3_xLeaderDecider + verify(kcl3xLeaderDecider, times(0)).isLeader(anyString()); + verify(kcl2xLeaderDecider, times(2)).isLeader(anyString()); + + migrationAdaptiveLeaderDecider.updateLeaderDecider(kcl3xLeaderDecider); + + final boolean responseThird = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + final boolean responseForth = migrationAdaptiveLeaderDecider.isLeader(TEST_RANDOM_WORKER_ID); + assertTrue(responseThird); + assertTrue(responseForth); + + // validate no more call to kcl2_xLeaderDecider and 2 calls gone to kcl3_xLeaderDecider + verify(kcl3xLeaderDecider, times(2)).isLeader(anyString()); + verify(kcl2xLeaderDecider, times(2)).isLeader(anyString()); + + // Both LD as initialized once, kcl2_xLeaderDecider as initial mode and kcl3_xLeaderDecider after switch and + // only once. + verify(kcl2xLeaderDecider, times(1)).initialize(); + verify(kcl3xLeaderDecider, times(1)).initialize(); + + // As the mode has changed, validate shutdown is called for kcl2_xLeaderDecider + verify(kcl2xLeaderDecider, times(1)).shutdown(); + verify(kcl3xLeaderDecider, times(0)).shutdown(); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ExceptionThrowingLeaseRefresher.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ExceptionThrowingLeaseRefresher.java index 62272bbeb..3ed07ca3a 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ExceptionThrowingLeaseRefresher.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ExceptionThrowingLeaseRefresher.java @@ -56,6 +56,7 @@ public enum ExceptionThrowingLeaseRefresherMethods { DELETEALL(10), UPDATELEASE(11), LISTLEASESFORSTREAM(12), + IS_LEASE_OWNER_TO_LEASE_KEY_INDEX_ACTIVE(13), NONE(Integer.MIN_VALUE); private Integer index; @@ -229,4 +230,12 @@ public ExtendedSequenceNumber getCheckpoint(final String leaseKey) throws ProvisionedThroughputException, InvalidStateException, DependencyException { return null; } + + @Override + public boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException { + throwExceptions( + "isLeaseOwnerToLeaseKeyIndexActive", + ExceptionThrowingLeaseRefresherMethods.IS_LEASE_OWNER_TO_LEASE_KEY_INDEX_ACTIVE); + return false; + } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/HierarchicalShardSyncerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/HierarchicalShardSyncerTest.java index e22a9126a..424d50df7 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/HierarchicalShardSyncerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/HierarchicalShardSyncerTest.java @@ -70,7 +70,6 @@ import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Mockito.atLeast; -import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -479,10 +478,6 @@ public void testCheckAndCreateLeasesForShardsWithShardListMultiStream() throws E @Test public void testCheckAndCreateLeasesForShardsWithEmptyShardList() throws Exception { final ArgumentCaptor leaseCaptor = ArgumentCaptor.forClass(Lease.class); - when(shardDetector.listShards()).thenReturn(SHARD_GRAPH_A); - when(dynamoDBLeaseRefresher.listLeases()).thenReturn(Collections.emptyList()); - when(dynamoDBLeaseRefresher.createLeaseIfNotExists(leaseCaptor.capture())) - .thenReturn(true); hierarchicalShardSyncer.checkAndCreateLeaseForNewShards( shardDetector, @@ -841,7 +836,6 @@ private void testCheckAndCreateLeasesForNewShardsAndClosedShard( .thenReturn(leases); when(dynamoDBLeaseRefresher.createLeaseIfNotExists(leaseCreateCaptor.capture())) .thenReturn(true); - doNothing().when(dynamoDBLeaseRefresher).deleteLease(leaseDeleteCaptor.capture()); // Initial call: No leases present, create leases. hierarchicalShardSyncer.checkAndCreateLeaseForNewShards( diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/KinesisShardDetectorTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/KinesisShardDetectorTest.java index 9c855c4bc..99852d3a2 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/KinesisShardDetectorTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/KinesisShardDetectorTest.java @@ -15,6 +15,7 @@ package software.amazon.kinesis.leases; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -39,6 +40,7 @@ import software.amazon.awssdk.services.kinesis.model.ResourceInUseException; import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException; import software.amazon.awssdk.services.kinesis.model.Shard; +import software.amazon.kinesis.common.StreamIdentifier; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.isA; @@ -63,6 +65,7 @@ public class KinesisShardDetectorTest { private static final long LIST_SHARDS_CACHE_ALLOWED_AGE_IN_SECONDS = 10; private static final int MAX_CACHE_MISSES_BEFORE_RELOAD = 10; private static final int CACHE_MISS_WARNING_MODULUS = 2; + private static final Duration KINESIS_REQUEST_TIMEOUT = Duration.ofSeconds(5); private static final String SHARD_ID = "shardId-%012d"; private KinesisShardDetector shardDetector; @@ -80,12 +83,13 @@ public class KinesisShardDetectorTest { public void setup() { shardDetector = new KinesisShardDetector( client, - STREAM_NAME, + StreamIdentifier.singleStreamInstance(STREAM_NAME), LIST_SHARDS_BACKOFF_TIME_IN_MILLIS, MAX_LIST_SHARDS_RETRY_ATTEMPTS, LIST_SHARDS_CACHE_ALLOWED_AGE_IN_SECONDS, MAX_CACHE_MISSES_BEFORE_RELOAD, - CACHE_MISS_WARNING_MODULUS); + CACHE_MISS_WARNING_MODULUS, + KINESIS_REQUEST_TIMEOUT); } @Test diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCleanupManagerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCleanupManagerTest.java index 9d51351c3..93d34badf 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCleanupManagerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCleanupManagerTest.java @@ -19,7 +19,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.UUID; import java.util.concurrent.ScheduledExecutorService; import java.util.stream.Collectors; @@ -82,8 +81,6 @@ public void setUp() throws Exception { garbageLeaseCleanupIntervalMillis); when(leaseCoordinator.leaseRefresher()).thenReturn(leaseRefresher); - when(leaseCoordinator.updateLease(any(Lease.class), any(UUID.class), any(String.class), any(String.class))) - .thenReturn(true); } /** @@ -238,7 +235,6 @@ public final void testLeaseDeletedWhenShardDoesNotExistAndCleanupCompletedLeaseD private void testLeaseDeletedWhenShardDoesNotExist(Lease heldLease) throws Exception { when(leaseCoordinator.leaseRefresher()).thenReturn(leaseRefresher); - when(leaseCoordinator.getCurrentlyHeldLease(SHARD_INFO.shardId())).thenReturn(heldLease); when(shardDetector.getChildShards(any(String.class))).thenThrow(ResourceNotFoundException.class); when(leaseRefresher.getLease(heldLease.leaseKey())).thenReturn(heldLease); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCoordinatorExerciser.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCoordinatorExerciser.java index 0bc285a6b..26eb0d3fb 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCoordinatorExerciser.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseCoordinatorExerciser.java @@ -25,15 +25,21 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import javax.swing.BoxLayout; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JPanel; import lombok.extern.slf4j.Slf4j; +import org.mockito.Mockito; import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.kinesis.common.DdbTableConfig; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer; @@ -47,6 +53,9 @@ import software.amazon.kinesis.metrics.MetricsLevel; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + @Slf4j public class LeaseCoordinatorExerciser { private static final int MAX_LEASES_FOR_WORKER = Integer.MAX_VALUE; @@ -73,7 +82,18 @@ public static void main(String[] args) dynamoDBClient, new DynamoDBLeaseSerializer(), true, - TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK); + TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); + + MigrationAdaptiveLeaseAssignmentModeProvider mockModeProvider = + mock(MigrationAdaptiveLeaseAssignmentModeProvider.class, Mockito.RETURNS_MOCKS); + when(mockModeProvider.getLeaseAssignmentMode()) + .thenReturn(LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT); + when(mockModeProvider.dynamicModeChangeSupportNeeded()).thenReturn(false); if (leaseRefresher.createLeaseTableIfNotExists()) { log.info("Waiting for newly created lease table"); @@ -102,13 +122,17 @@ public static void main(String[] args) leaseRefresher, workerIdentifier, leaseDurationMillis, + LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT, epsilonMillis, MAX_LEASES_FOR_WORKER, MAX_LEASES_TO_STEAL_AT_ONE_TIME, MAX_LEASE_RENEWER_THREAD_COUNT, INITIAL_LEASE_TABLE_READ_CAPACITY, INITIAL_LEASE_TABLE_WRITE_CAPACITY, - metricsFactory); + metricsFactory, + new LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig(), + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder().build(), + new ConcurrentHashMap<>()); coordinators.add(coord); } @@ -144,7 +168,7 @@ public void actionPerformed(ActionEvent arg0) { button.setLabel("Start " + coord.workerIdentifier()); } else { try { - coord.start(); + coord.start(mockModeProvider); } catch (LeasingException e) { log.error("{}", e); } @@ -238,7 +262,7 @@ public int compare(final Lease arg0, final Lease arg1) { frame.setVisible(true); for (LeaseCoordinator coord : coordinators) { - coord.start(); + coord.start(mockModeProvider); } } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationBillingModePayPerRequestTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationBillingModePayPerRequestTest.java index f35b4ed8b..1c404baff 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationBillingModePayPerRequestTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationBillingModePayPerRequestTest.java @@ -15,7 +15,9 @@ package software.amazon.kinesis.leases; import lombok.extern.slf4j.Slf4j; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; @Slf4j @@ -29,7 +31,9 @@ protected DynamoDBLeaseRefresher getLeaseRefresher() { true, tableCreatorCallback, LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PAY_PER_REQUEST, - false); + new DdbTableConfig().billingMode(BillingMode.PAY_PER_REQUEST), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationTest.java index 6f3122712..d385a98bc 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseIntegrationTest.java @@ -20,8 +20,10 @@ import org.junit.runner.Description; import org.mockito.Mock; import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer; import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback; @@ -80,7 +82,9 @@ protected DynamoDBLeaseRefresher getLeaseRefresher() { true, tableCreatorCallback, LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PAY_PER_REQUEST, - false); + new DdbTableConfig().billingMode(BillingMode.PAY_PER_REQUEST), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseStatsRecorderTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseStatsRecorderTest.java new file mode 100644 index 000000000..77209a308 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseStatsRecorderTest.java @@ -0,0 +1,133 @@ +package software.amazon.kinesis.leases; + +import java.time.Duration; +import java.util.UUID; +import java.util.concurrent.Callable; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.mockito.Mockito.when; + +class LeaseStatsRecorderTest { + + private LeaseStatsRecorder leaseStatsRecorder; + private Callable mockedTimeProviderInMillis; + private static final long TEST_RENEWER_FREQ = Duration.ofMinutes(1).toMillis(); + + @BeforeEach + void setup() { + mockedTimeProviderInMillis = Mockito.mock(Callable.class); + leaseStatsRecorder = new LeaseStatsRecorder(TEST_RENEWER_FREQ, mockedTimeProviderInMillis); + } + + @Test + void leaseStatsRecorder_sanity() throws Exception { + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + when(mockedTimeProviderInMillis.call()).thenReturn(System.currentTimeMillis() + 1); + + assertEquals( + Math.floor(this.leaseStatsRecorder.getThroughputKBps("lease-key1")), + 85.0, + "Incorrect throughputKbps calculated"); + // Test idempotent behavior + assertEquals( + Math.floor(this.leaseStatsRecorder.getThroughputKBps("lease-key1")), + 85.0, + "Incorrect throughputKbps calculated"); + } + + @Test + void leaseStatsRecorder_validateDecayToZero() throws Exception { + final long currentTime = System.currentTimeMillis(); + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1", currentTime, 1)); + when(mockedTimeProviderInMillis.call()) + .thenReturn(currentTime + 1) + .thenReturn(currentTime + 1) + .thenReturn(currentTime - Duration.ofMillis(TEST_RENEWER_FREQ).toMillis() - 5); + for (int i = 0; i < 2000; ++i) { + this.leaseStatsRecorder.getThroughputKBps("lease-key1"); + } + + // after decaying for long time, it eventually goes to zero after going below minimum range of double + // the test also validates that decaying does not fail with exception if we keep decaying a value + assertEquals(0.0D, this.leaseStatsRecorder.getThroughputKBps("lease-key1")); + } + + @Test + void leaseStatsRecorder_validateVeryHighThroughout() throws Exception { + final long currentTime = System.currentTimeMillis(); + // 1000 stats recorded + for (int i = 0; i < 1000; ++i) { + // 1 GB of bytes per stats + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1", currentTime, 1024 * 1024 * 1024)); + } + when(mockedTimeProviderInMillis.call()).thenReturn(System.currentTimeMillis() + 1); + assertEquals(17476266D, Math.floor(this.leaseStatsRecorder.getThroughputKBps("lease-key1"))); + } + + @Test + void leaseStatsRecorder_expiredItems_assertZeroOutput() throws Exception { + // Insert an expired item + this.leaseStatsRecorder.recordStats( + generateRandomLeaseStat("lease-key1", System.currentTimeMillis() - TEST_RENEWER_FREQ - 10)); + when(mockedTimeProviderInMillis.call()).thenReturn(System.currentTimeMillis() + 1); + + assertEquals( + this.leaseStatsRecorder.getThroughputKBps("lease-key1"), + 0.0, + "throughputKbps is not 0 when in case where all items are expired."); + } + + @Test + void getThroughputKbps_noEntryPresent_assertNull() throws Exception { + when(mockedTimeProviderInMillis.call()).thenReturn(System.currentTimeMillis()); + assertNull( + this.leaseStatsRecorder.getThroughputKBps(UUID.randomUUID().toString()), + "Did not return null for non existing leaseKey stats."); + } + + @Test + void dropLeaseStats_sanity() throws Exception { + this.leaseStatsRecorder.recordStats(generateRandomLeaseStat("lease-key1")); + when(mockedTimeProviderInMillis.call()).thenReturn(System.currentTimeMillis() + 1); + + assertEquals( + Math.floor(this.leaseStatsRecorder.getThroughputKBps("lease-key1")), + 17.0, + "Incorrect throughputKbps calculated"); + + this.leaseStatsRecorder.dropLeaseStats("lease-key1"); + // after drop, no entry is present and thus validate method returns null. + assertNull( + this.leaseStatsRecorder.getThroughputKBps("lease-key1"), + "LeaseStats exists even after dropping lease stats"); + } + + private static LeaseStatsRecorder.LeaseStats generateRandomLeaseStat(final String leaseKey) { + return generateRandomLeaseStat(leaseKey, System.currentTimeMillis()); + } + + private static LeaseStatsRecorder.LeaseStats generateRandomLeaseStat( + final String leaseKey, final long creationTimeMillis) { + // 1 MB data + return generateRandomLeaseStat(leaseKey, creationTimeMillis, 1024 * 1024); + } + + private static LeaseStatsRecorder.LeaseStats generateRandomLeaseStat( + final String leaseKey, final long creationTimeMillis, final long bytes) { + LeaseStatsRecorder.LeaseStats leaseStats = LeaseStatsRecorder.LeaseStats.builder() + .leaseKey(leaseKey) + .bytes(bytes) + .creationTimeMillis(creationTimeMillis) + .build(); + return leaseStats; + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseTest.java index e0e338ba6..c4d076801 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/LeaseTest.java @@ -10,6 +10,9 @@ import org.mockito.runners.MockitoJUnitRunner; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + @RunWith(MockitoJUnitRunner.class) public class LeaseTest { @@ -18,12 +21,16 @@ public class LeaseTest { private static final long LEASE_DURATION_NANOS = TimeUnit.MILLISECONDS.toNanos(LEASE_DURATION_MILLIS); + private static final long LEASE_CHECKPOINT_TIMEOUT = 1000; + private final Lease shutdownRequestedLease = createShutdownRequestedLease(); + private final Lease eligibleForGracefulShutdownLease = createisEligibleForGracefulShutdownLease(); + // Write a unit test for software.amazon.kinesis.leases.Lease to test leaseOwner as null and epired @Test public void testLeaseOwnerNullAndExpired() { long expiredTime = MOCK_CURRENT_TIME - LEASE_DURATION_NANOS - 1; Lease lease = createLease(null, "leaseKey", expiredTime); - Assert.assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); + assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); Assert.assertNull(lease.leaseOwner()); } @@ -31,7 +38,7 @@ public void testLeaseOwnerNullAndExpired() { public void testLeaseOwnerNotNullAndExpired() { long expiredTime = MOCK_CURRENT_TIME - LEASE_DURATION_NANOS - 1; Lease lease = createLease("leaseOwner", "leaseKey", expiredTime); - Assert.assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); + assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); Assert.assertEquals("leaseOwner", lease.leaseOwner()); } @@ -39,7 +46,7 @@ public void testLeaseOwnerNotNullAndExpired() { public void testLeaseOwnerNotNullAndNotExpired() { long notExpiredTime = MOCK_CURRENT_TIME - LEASE_DURATION_NANOS + 1; Lease lease = createLease("leaseOwner", "leaseKey", notExpiredTime); - Assert.assertFalse(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); + assertFalse(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); Assert.assertEquals("leaseOwner", lease.leaseOwner()); } @@ -47,11 +54,62 @@ public void testLeaseOwnerNotNullAndNotExpired() { public void testLeaseOwnerNullAndNotExpired() { long notExpiredTime = MOCK_CURRENT_TIME - LEASE_DURATION_NANOS + 1; Lease lease = createLease(null, "leaseKey", notExpiredTime); - Assert.assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); + assertTrue(lease.isAvailable(LEASE_DURATION_NANOS, MOCK_CURRENT_TIME)); Assert.assertNull(lease.leaseOwner()); } - private Lease createLease(String leaseOwner, String leaseKey, long lastCounterIncrementNanos) { + @Test + public void testBlockedOnPendingCheckpoint_LeaseAssignedAndCheckpointNotExpired_assertTrue() { + assertTrue(shutdownRequestedLease.blockedOnPendingCheckpoint(LEASE_CHECKPOINT_TIMEOUT - 1)); + } + + @Test + public void testBlockedOnPendingCheckpoint_LeaseUnassigned_assertFalse() { + shutdownRequestedLease.isExpiredOrUnassigned(true); + assertFalse(shutdownRequestedLease.blockedOnPendingCheckpoint(LEASE_CHECKPOINT_TIMEOUT)); + } + + @Test + public void testBlockedOnPendingCheckpoint_ShardEnd_assertFalse() { + shutdownRequestedLease.checkpoint(ExtendedSequenceNumber.SHARD_END); + assertFalse(shutdownRequestedLease.blockedOnPendingCheckpoint(LEASE_CHECKPOINT_TIMEOUT)); + } + + @Test + public void testBlockedOnPendingCheckpoint_ShutdownNotRequested_assertFalse() { + shutdownRequestedLease.checkpointOwner(null); + assertFalse(shutdownRequestedLease.blockedOnPendingCheckpoint(LEASE_CHECKPOINT_TIMEOUT)); + } + + @Test + public void testBlockedOnPendingCheckpoint_CheckpointTimeoutExpired_assertFalse() { + assertFalse(shutdownRequestedLease.blockedOnPendingCheckpoint(LEASE_CHECKPOINT_TIMEOUT + 1000)); + } + + @Test + public void testIsEligibleForGracefulShutdown_leaseNotExpiredNotShuttingDownAndNotShardEnd_assertTrue() { + assertTrue(eligibleForGracefulShutdownLease.isEligibleForGracefulShutdown()); + } + + @Test + public void testIsEligibleForGracefulShutdownFalse_shardEnd_assertFalse() { + eligibleForGracefulShutdownLease.checkpoint(ExtendedSequenceNumber.SHARD_END); + assertFalse(shutdownRequestedLease.isEligibleForGracefulShutdown()); + } + + @Test + public void testIsEligibleForGracefulShutdownFalse_leaseUnassigned_assertFalse() { + eligibleForGracefulShutdownLease.isExpiredOrUnassigned(true); + assertFalse(shutdownRequestedLease.isEligibleForGracefulShutdown()); + } + + @Test + public void testIsEligibleForGracefulShutdownFalse_shutdownRequested_assertFalse() { + eligibleForGracefulShutdownLease.checkpointOwner("owner"); + assertFalse(shutdownRequestedLease.isEligibleForGracefulShutdown()); + } + + private static Lease createLease(String leaseOwner, String leaseKey, long lastCounterIncrementNanos) { final Lease lease = new Lease(); lease.checkpoint(new ExtendedSequenceNumber("checkpoint")); lease.ownerSwitchesSinceCheckpoint(0L); @@ -63,4 +121,19 @@ private Lease createLease(String leaseOwner, String leaseKey, long lastCounterIn lease.lastCounterIncrementNanos(lastCounterIncrementNanos); return lease; } + + private static Lease createShutdownRequestedLease() { + final Lease lease = createLease("leaseOwner", "leaseKey", 0); + lease.checkpointOwner("checkpointOwner"); + lease.checkpointOwnerTimeoutTimestampMillis(LEASE_CHECKPOINT_TIMEOUT); + lease.isExpiredOrUnassigned(false); + return lease; + } + + private static Lease createisEligibleForGracefulShutdownLease() { + final Lease lease = createLease("leaseOwner", "leaseKey", 0); + lease.isExpiredOrUnassigned(false); + lease.checkpoint(ExtendedSequenceNumber.TRIM_HORIZON); + return lease; + } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ShardSyncTaskIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ShardSyncTaskIntegrationTest.java index 28915b16a..acb72c799 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ShardSyncTaskIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/ShardSyncTaskIntegrationTest.java @@ -14,6 +14,7 @@ */ package software.amazon.kinesis.leases; +import java.time.Duration; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -24,6 +25,7 @@ import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; import software.amazon.awssdk.services.kinesis.KinesisAsyncClient; @@ -31,8 +33,10 @@ import software.amazon.awssdk.services.kinesis.model.KinesisException; import software.amazon.awssdk.services.kinesis.model.Shard; import software.amazon.awssdk.services.kinesis.model.StreamStatus; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.common.InitialPositionInStream; import software.amazon.kinesis.common.InitialPositionInStreamExtended; +import software.amazon.kinesis.common.StreamIdentifier; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher; import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer; import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback; @@ -53,6 +57,7 @@ public class ShardSyncTaskIntegrationTest { private static final int MAX_CACHE_MISSES_BEFORE_RELOAD = 1000; private static final long LIST_SHARDS_CACHE_ALLOWED_AGE_IN_SECONDS = 30; private static final int CACHE_MISS_WARNING_MODULUS = 250; + private static final Duration KINESIS_REQUEST_TIMEOUT = Duration.ofSeconds(5); private static final MetricsFactory NULL_METRICS_FACTORY = new NullMetricsFactory(); private static KinesisAsyncClient kinesisClient; @@ -98,16 +103,22 @@ public void setup() { client, new DynamoDBLeaseSerializer(), USE_CONSISTENT_READS, - TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK); + TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); shardDetector = new KinesisShardDetector( kinesisClient, - STREAM_NAME, + StreamIdentifier.singleStreamInstance(STREAM_NAME), 500L, 50, LIST_SHARDS_CACHE_ALLOWED_AGE_IN_SECONDS, MAX_CACHE_MISSES_BEFORE_RELOAD, - CACHE_MISS_WARNING_MODULUS); + CACHE_MISS_WARNING_MODULUS, + KINESIS_REQUEST_TIMEOUT); hierarchicalShardSyncer = new HierarchicalShardSyncer(); } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorIntegrationTest.java index f52b91e12..5694b03a5 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorIntegrationTest.java @@ -19,16 +19,23 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.Mockito; import org.mockito.runners.MockitoJUnitRunner; import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; import software.amazon.kinesis.checkpoint.dynamodb.DynamoDBCheckpointer; +import software.amazon.kinesis.common.DdbTableConfig; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider; +import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseCoordinator; +import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.leases.exceptions.DependencyException; import software.amazon.kinesis.leases.exceptions.InvalidStateException; import software.amazon.kinesis.leases.exceptions.LeasingException; @@ -44,6 +51,8 @@ import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; @RunWith(MockitoJUnitRunner.class) public class DynamoDBLeaseCoordinatorIntegrationTest { @@ -79,7 +88,12 @@ public void setup() throws ProvisionedThroughputException, DependencyException, dynamoDBClient, new DynamoDBLeaseSerializer(), useConsistentReads, - TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK); + TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); } leaseRefresher.createLeaseTableIfNotExists(10L, 10L); @@ -104,17 +118,27 @@ public void setup() throws ProvisionedThroughputException, DependencyException, leaseRefresher, WORKER_ID, LEASE_DURATION_MILLIS, + LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT, EPSILON_MILLIS, MAX_LEASES_FOR_WORKER, MAX_LEASES_TO_STEAL_AT_ONE_TIME, MAX_LEASE_RENEWER_THREAD_COUNT, INITIAL_LEASE_TABLE_READ_CAPACITY, INITIAL_LEASE_TABLE_WRITE_CAPACITY, - metricsFactory); + metricsFactory, + new LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig(), + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder().build(), + new ConcurrentHashMap<>()); dynamoDBCheckpointer = new DynamoDBCheckpointer(coordinator, leaseRefresher); dynamoDBCheckpointer.operation(OPERATION); - coordinator.start(); + MigrationAdaptiveLeaseAssignmentModeProvider mockModeProvider = + mock(MigrationAdaptiveLeaseAssignmentModeProvider.class, Mockito.RETURNS_MOCKS); + when(mockModeProvider.getLeaseAssignmentMode()) + .thenReturn(LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT); + when(mockModeProvider.dynamicModeChangeSupportNeeded()).thenReturn(false); + + coordinator.start(mockModeProvider); } /** diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorTest.java index 2b9ffbcd6..469531ef9 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseCoordinatorTest.java @@ -1,95 +1,103 @@ package software.amazon.kinesis.leases.dynamodb; -import java.util.UUID; +import java.time.Duration; +import java.util.ArrayList; +import java.util.concurrent.ConcurrentHashMap; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.runners.MockitoJUnitRunner; -import software.amazon.kinesis.leases.LeaseRefresher; +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.IndexStatus; +import software.amazon.kinesis.common.DdbTableConfig; +import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.leases.exceptions.DependencyException; -import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.NullMetricsFactory; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static software.amazon.kinesis.leases.dynamodb.TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK; -@RunWith(MockitoJUnitRunner.class) -public class DynamoDBLeaseCoordinatorTest { +class DynamoDBLeaseCoordinatorTest { - private static final String WORKER_ID = UUID.randomUUID().toString(); - private static final boolean ENABLE_PRIORITY_LEASE_ASSIGNMENT = true; - private static final long LEASE_DURATION_MILLIS = 5000L; - private static final long EPSILON_MILLIS = 25L; - private static final int MAX_LEASES_FOR_WORKER = Integer.MAX_VALUE; - private static final int MAX_LEASES_TO_STEAL_AT_ONE_TIME = 1; - private static final int MAX_LEASE_RENEWER_THREAD_COUNT = 20; - private static final long INITIAL_LEASE_TABLE_READ_CAPACITY = 10L; - private static final long INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L; - private static final long SECONDS_BETWEEN_POLLS = 10L; - private static final long TIMEOUT_SECONDS = 600L; + private static final String TEST_LEASE_TABLE = "SomeTable"; + private DynamoDBLeaseRefresher leaseRefresher; + private DynamoDBLeaseCoordinator dynamoDBLeaseCoordinator; + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); - @Mock - private LeaseRefresher leaseRefresher; - - @Mock - private MetricsFactory metricsFactory; - - private DynamoDBLeaseCoordinator leaseCoordinator; - - @Before - public void setup() { - this.leaseCoordinator = new DynamoDBLeaseCoordinator( - leaseRefresher, - WORKER_ID, - LEASE_DURATION_MILLIS, - ENABLE_PRIORITY_LEASE_ASSIGNMENT, - EPSILON_MILLIS, - MAX_LEASES_FOR_WORKER, - MAX_LEASES_TO_STEAL_AT_ONE_TIME, - MAX_LEASE_RENEWER_THREAD_COUNT, - INITIAL_LEASE_TABLE_READ_CAPACITY, - INITIAL_LEASE_TABLE_WRITE_CAPACITY, - metricsFactory); + @BeforeEach + void setUp() { + this.leaseRefresher = new DynamoDBLeaseRefresher( + TEST_LEASE_TABLE, + dynamoDbAsyncClient, + new DynamoDBLeaseSerializer(), + true, + NOOP_TABLE_CREATOR_CALLBACK, + Duration.ofSeconds(10), + new DdbTableConfig(), + true, + true, + new ArrayList<>()); } + // TODO - move this test to migration state machine which creates the GSI + @Disabled @Test - public void testInitialize_tableCreationSucceeds() throws Exception { - when(leaseRefresher.createLeaseTableIfNotExists()).thenReturn(true); - when(leaseRefresher.waitUntilLeaseTableExists(SECONDS_BETWEEN_POLLS, TIMEOUT_SECONDS)) - .thenReturn(true); + void initialize_withLeaseAssignmentManagerMode_assertIndexOnTable() + throws ProvisionedThroughputException, DependencyException { + + constructCoordinatorAndInitialize(); - leaseCoordinator.initialize(); + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - verify(leaseRefresher).createLeaseTableIfNotExists(); - verify(leaseRefresher).waitUntilLeaseTableExists(SECONDS_BETWEEN_POLLS, TIMEOUT_SECONDS); + assertEquals(1, response.table().globalSecondaryIndexes().size()); + assertEquals( + IndexStatus.ACTIVE, + response.table().globalSecondaryIndexes().get(0).indexStatus()); } - @Test(expected = DependencyException.class) - public void testInitialize_tableCreationFails() throws Exception { - when(leaseRefresher.createLeaseTableIfNotExists()).thenReturn(false); - when(leaseRefresher.waitUntilLeaseTableExists(SECONDS_BETWEEN_POLLS, TIMEOUT_SECONDS)) - .thenReturn(false); + // TODO - move this to migration state machine test + @Disabled + @Test + void initialize_withDefaultMode_assertIndexInCreating() throws ProvisionedThroughputException, DependencyException { + constructCoordinatorAndInitialize(); + + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - try { - leaseCoordinator.initialize(); - } finally { - verify(leaseRefresher).createLeaseTableIfNotExists(); - verify(leaseRefresher).waitUntilLeaseTableExists(SECONDS_BETWEEN_POLLS, TIMEOUT_SECONDS); - } + assertEquals(1, response.table().globalSecondaryIndexes().size()); + assertEquals( + IndexStatus.CREATING, + response.table().globalSecondaryIndexes().get(0).indexStatus()); } - /** - * Validates a {@link NullPointerException} is not thrown when the lease taker - * is stopped before it starts/exists. - * - * @see issue #745 - * @see issue #900 - */ - @Test - public void testStopLeaseTakerBeforeStart() { - leaseCoordinator.stopLeaseTaker(); - assertTrue(leaseCoordinator.getAssignments().isEmpty()); + private void constructCoordinatorAndInitialize() throws ProvisionedThroughputException, DependencyException { + this.dynamoDBLeaseCoordinator = new DynamoDBLeaseCoordinator( + leaseRefresher, + "Identifier", + 100L, + true, + 100L, + 10, + 10, + 10, + 100L, + 100L, + new NullMetricsFactory(), + new LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig(), + LeaseManagementConfig.GracefulLeaseHandoffConfig.builder().build(), + new ConcurrentHashMap<>()); + this.dynamoDBLeaseCoordinator.initialize(); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscovererTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscovererTest.java new file mode 100644 index 000000000..364067bff --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseDiscovererTest.java @@ -0,0 +1,189 @@ +package software.amazon.kinesis.leases.dynamodb; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.kinesis.common.DdbTableConfig; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; +import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; +import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.NullMetricsFactory; +import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.when; + +class DynamoDBLeaseDiscovererTest { + + private static final MetricsFactory TEST_METRICS_FACTORY = new NullMetricsFactory(); + private static final String TEST_WORKER_IDENTIFIER = "TestWorkerIdentifier"; + private static final String TEST_LEASE_TABLE_NAME = "TestTableName"; + + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); + private final LeaseRefresher leaseRefresher = new DynamoDBLeaseRefresher( + TEST_LEASE_TABLE_NAME, + dynamoDbAsyncClient, + new DynamoDBLeaseSerializer(), + true, + TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK, + Duration.ofSeconds(10), + new DdbTableConfig(), + true, + false, + new ArrayList<>()); + private final LeaseRenewer leaseRenewer = new DynamoDBLeaseRenewer( + leaseRefresher, + TEST_WORKER_IDENTIFIER, + Duration.ofSeconds(10).toMillis(), + Executors.newFixedThreadPool(1), + TEST_METRICS_FACTORY, + new LeaseStatsRecorder(30000L, System::currentTimeMillis), + lease -> {}); + private final DynamoDBLeaseDiscoverer dynamoDBLeaseDiscoverer = new DynamoDBLeaseDiscoverer( + leaseRefresher, + leaseRenewer, + TEST_METRICS_FACTORY, + TEST_WORKER_IDENTIFIER, + Executors.newFixedThreadPool(2)); + + @BeforeEach + void setUp() throws ProvisionedThroughputException, DependencyException { + this.leaseRefresher.createLeaseTableIfNotExists(); + this.leaseRefresher.waitUntilLeaseTableExists(1, 30); + this.leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists(); + this.leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(1, 30); + } + + @Test + void discoverNewLeases_happyCase_assertCorrectNewLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + + leaseRenewer.addLeasesToRenew( + Arrays.asList(createAssignAndAddLease("lease-1"), createAssignAndAddLease("lease-2"))); + createAssignAndAddLease("lease-3"); + createAssignAndAddLease("lease-4"); + + assertEquals(2, leaseRenewer.getCurrentlyHeldLeases().size()); + + final List response = dynamoDBLeaseDiscoverer.discoverNewLeases(); + final List responseLeaseKeys = + response.stream().map(Lease::leaseKey).collect(Collectors.toList()); + + assertEquals(2, response.size()); + assertTrue(responseLeaseKeys.contains("lease-3")); + assertTrue(responseLeaseKeys.contains("lease-4")); + } + + @Test + void discoverNewLeases_noLeasesInRenewer_assertCorrectNewLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + createAssignAndAddLease("lease-3"); + createAssignAndAddLease("lease-4"); + + assertEquals(0, leaseRenewer.getCurrentlyHeldLeases().size()); + + final List response = dynamoDBLeaseDiscoverer.discoverNewLeases(); + assertEquals(2, response.size()); + } + + @Test + void discoverNewLeases_leaseRefresherThrowsException_assertEmptyResponse() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final LeaseRefresher leaseRefresher1 = Mockito.mock(LeaseRefresher.class); + when(leaseRefresher1.getLease(any())).thenThrow(new DependencyException(new RuntimeException())); + when(leaseRefresher1.listLeaseKeysForWorker(any())).thenReturn(ImmutableList.of("lease-3")); + + final DynamoDBLeaseDiscoverer dynamoDBLeaseDiscoverer = new DynamoDBLeaseDiscoverer( + leaseRefresher1, + leaseRenewer, + TEST_METRICS_FACTORY, + TEST_WORKER_IDENTIFIER, + Executors.newFixedThreadPool(2)); + + final List response = dynamoDBLeaseDiscoverer.discoverNewLeases(); + assertEquals(0, response.size()); + } + + @Test + void discoverNewLeases_inconsistentGSI_assertEmptyResponse() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final LeaseRefresher leaseRefresher1 = Mockito.mock(LeaseRefresher.class); + + final Lease ownerNotMatchingLease = new Lease(); + ownerNotMatchingLease.leaseKey("ownerNotMatchingKey"); + // Any random owner + ownerNotMatchingLease.leaseOwner("RandomOwner"); + + final Lease ownerMatchingLease = new Lease(); + ownerMatchingLease.leaseKey("ownerMatchingKey"); + // Any random owner + ownerMatchingLease.leaseOwner(TEST_WORKER_IDENTIFIER); + + when(leaseRefresher1.getLease(ownerNotMatchingLease.leaseKey())).thenReturn(ownerNotMatchingLease); + when(leaseRefresher1.getLease(ownerMatchingLease.leaseKey())).thenReturn(ownerMatchingLease); + when(leaseRefresher1.listLeaseKeysForWorker(TEST_WORKER_IDENTIFIER)) + .thenReturn(ImmutableList.of("ownerMatchingKey", "ownerNotMatchingKey")); + + final DynamoDBLeaseDiscoverer dynamoDBLeaseDiscoverer = new DynamoDBLeaseDiscoverer( + leaseRefresher1, + leaseRenewer, + TEST_METRICS_FACTORY, + TEST_WORKER_IDENTIFIER, + Executors.newFixedThreadPool(2)); + + final List response = dynamoDBLeaseDiscoverer.discoverNewLeases(); + // Validate that only 1 lease is returned. + assertEquals(1, response.size()); + } + + @Test + void discoverNewLeases_ignorePendingCheckpointLeases_assertReadyLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + createAssignAndAddLease("lease-3"); + createAssignAndAddLease("lease-4"); + // create one lease with the same leaseOwner as the first two leases except this has + // value in checkpointOwner + final Lease lease = createLease("pendingCheckpointLease"); + lease.checkpointOwner("other_worker"); + this.leaseRefresher.createLeaseIfNotExists(lease); + + assertEquals(0, leaseRenewer.getCurrentlyHeldLeases().size()); + + assertEquals(2, dynamoDBLeaseDiscoverer.discoverNewLeases().size()); + } + + private Lease createAssignAndAddLease(final String leaseKey) + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + final Lease lease = createLease(leaseKey); + this.leaseRefresher.createLeaseIfNotExists(lease); + return lease; + } + + private Lease createLease(final String leaseKey) { + final Lease lease = new Lease(); + lease.leaseKey(leaseKey); + lease.leaseOwner(TEST_WORKER_IDENTIFIER); + lease.leaseCounter(13L); + lease.checkpoint(new ExtendedSequenceNumber("123")); + lease.lastCounterIncrementNanos(System.nanoTime()); + return lease; + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherIntegrationTest.java index 21a7a44f3..70ff769ef 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherIntegrationTest.java @@ -25,10 +25,13 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.runners.MockitoJUnitRunner; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; import software.amazon.awssdk.services.kinesis.model.HashKeyRange; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.common.HashKeyRangeForLease; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseIntegrationTest; +import software.amazon.kinesis.leases.LeaseManagementConfig; import software.amazon.kinesis.leases.UpdateField; import software.amazon.kinesis.leases.exceptions.LeasingException; @@ -310,7 +313,12 @@ public void testWaitUntilLeaseTableExists() throws LeasingException { ddbClient, new DynamoDBLeaseSerializer(), true, - tableCreatorCallback); + tableCreatorCallback, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); refresher.createLeaseTableIfNotExists(); assertTrue(refresher.waitUntilLeaseTableExists(1, 20)); @@ -324,7 +332,16 @@ public void testWaitUntilLeaseTableExistsTimeout() throws LeasingException { final AtomicInteger sleepCounter = new AtomicInteger(0); DynamoDBLeaseRefresher refresher = new DynamoDBLeaseRefresher( - "nonexistentTable", ddbClient, new DynamoDBLeaseSerializer(), true, tableCreatorCallback) { + "nonexistentTable", + ddbClient, + new DynamoDBLeaseSerializer(), + true, + tableCreatorCallback, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()) { @Override long sleep(long timeToSleepMillis) { assertEquals(1000L, timeToSleepMillis); @@ -340,7 +357,16 @@ long sleep(long timeToSleepMillis) { @Test public void testTableCreatorCallback() throws Exception { DynamoDBLeaseRefresher refresher = new DynamoDBLeaseRefresher( - tableName, ddbClient, new DynamoDBLeaseSerializer(), true, tableCreatorCallback); + tableName, + ddbClient, + new DynamoDBLeaseSerializer(), + true, + tableCreatorCallback, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig(), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); refresher.performPostTableCreationAction(); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherTest.java index 2668918c9..4af44b141 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRefresherTest.java @@ -1,711 +1,827 @@ -/* - * Copyright 2019 Amazon.com, Inc. or its affiliates. - * Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package software.amazon.kinesis.leases.dynamodb; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.runners.MockitoJUnitRunner; -import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; +import java.util.concurrent.Executors; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; import software.amazon.awssdk.services.dynamodb.model.AttributeValue; import software.amazon.awssdk.services.dynamodb.model.BillingMode; -import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; -import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse; -import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest; -import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest; import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; -import software.amazon.awssdk.services.dynamodb.model.DynamoDbException; -import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; -import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; -import software.amazon.awssdk.services.dynamodb.model.LimitExceededException; -import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput; +import software.amazon.awssdk.services.dynamodb.model.GlobalSecondaryIndexDescription; +import software.amazon.awssdk.services.dynamodb.model.IndexStatus; import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; -import software.amazon.awssdk.services.dynamodb.model.PutItemResponse; -import software.amazon.awssdk.services.dynamodb.model.ResourceInUseException; import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; -import software.amazon.awssdk.services.dynamodb.model.ScanRequest; -import software.amazon.awssdk.services.dynamodb.model.ScanResponse; import software.amazon.awssdk.services.dynamodb.model.TableDescription; import software.amazon.awssdk.services.dynamodb.model.TableStatus; -import software.amazon.awssdk.services.dynamodb.model.Tag; import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest; import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsResponse; -import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest; -import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.leases.Lease; -import software.amazon.kinesis.leases.LeaseManagementConfig; -import software.amazon.kinesis.leases.LeaseSerializer; +import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.exceptions.DependencyException; +import software.amazon.kinesis.leases.exceptions.InvalidStateException; import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; +import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyBoolean; -import static org.mockito.Matchers.anyLong; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static software.amazon.awssdk.services.dynamodb.model.IndexStatus.ACTIVE; +import static software.amazon.awssdk.services.dynamodb.model.IndexStatus.CREATING; +import static software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher.LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME; +import static software.amazon.kinesis.leases.dynamodb.TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK; -@RunWith(MockitoJUnitRunner.class) -public class DynamoDBLeaseRefresherTest { +class DynamoDBLeaseRefresherTest { - private static final String TABLE_NAME = "test"; - private static final boolean CONSISTENT_READS = true; - private static final boolean DELETION_PROTECTION_ENABLED = false; - private static final boolean PITR_ENABLED = true; - private static final Collection EMPTY_TAGS = DefaultSdkAutoConstructList.getInstance(); - private static final Collection TAGS = - Collections.singletonList(Tag.builder().key("foo").value("bar").build()); + private static final String TEST_LEASE_TABLE = "SomeTable"; + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); - @Mock - private DynamoDbAsyncClient dynamoDbClient; + @Test + void createLeaseTableWithPitr() throws DependencyException, ProvisionedThroughputException { + // DynamoDBLocal does not support PITR operations on table so using mocks + final DynamoDbAsyncClient mockDdbClient = mock(DynamoDbAsyncClient.class, Mockito.RETURNS_MOCKS); + DynamoDBLeaseRefresher dynamoDBLeaseRefresherWithPitr = + createLeaseRefresher(new DdbTableConfig(), mockDdbClient, false, true); - @Mock - private LeaseSerializer leaseSerializer; + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))) + .thenThrow(ResourceNotFoundException.builder() + .message("Mock table does not exist scenario") + .build()); - @Mock - private TableCreatorCallback tableCreatorCallback; + final CompletableFuture future = new CompletableFuture<>(); + future.complete(UpdateContinuousBackupsResponse.builder().build()); - @Mock - private CompletableFuture mockScanFuture; + when(mockDdbClient.updateContinuousBackups(any(UpdateContinuousBackupsRequest.class))) + .thenReturn(future); - @Mock - private CompletableFuture mockPutItemFuture; + setupTable(dynamoDBLeaseRefresherWithPitr); - @Mock - private CompletableFuture mockGetItemFuture; + UpdateContinuousBackupsRequest updateContinuousBackupsRequest = UpdateContinuousBackupsRequest.builder() + .tableName(TEST_LEASE_TABLE) + .pointInTimeRecoverySpecification(builder -> builder.pointInTimeRecoveryEnabled(true)) + .build(); - @Mock - private CompletableFuture mockUpdateFuture; + verify(mockDdbClient, times(1)).updateContinuousBackups(updateContinuousBackupsRequest); + } - @Mock - private CompletableFuture mockDeleteFuture; + @Test + void createLeaseTableWithDeletionProtection() throws DependencyException, ProvisionedThroughputException { + DynamoDBLeaseRefresher dynamoDBLeaseRefresherWithDeletionProtection = + createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient, true, false); - @Mock - private CompletableFuture mockDescribeTableFuture; + dynamoDBLeaseRefresherWithDeletionProtection.createLeaseTableIfNotExists(); + dynamoDBLeaseRefresherWithDeletionProtection.waitUntilLeaseTableExists(1, 30); - @Mock - private CompletableFuture mockCreateTableFuture; + final DescribeTableResponse describeTableResponse = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); + + assertTrue(describeTableResponse.table().deletionProtectionEnabled()); + } - @Mock - private CompletableFuture mockUpdateContinuousBackupsFuture; + @Test + void createWorkerIdToLeaseKeyIndexIfNotExists_sanity() throws DependencyException, ProvisionedThroughputException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); - @Mock - private Lease lease; + assertFalse(leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()); - @Rule - public ExpectedException expectedException = ExpectedException.none(); + final String creationResponse = leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists(); - private DynamoDBLeaseRefresher leaseRefresher; - private DescribeTableRequest describeTableRequest; - private CreateTableRequest createTableRequest; - private UpdateContinuousBackupsRequest updateContinuousBackupsRequest; - private Map serializedLease; + final boolean waitResponse = leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(1, 30); + assertTrue(leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()); - @Before - public void setup() throws Exception { - leaseRefresher = new DynamoDBLeaseRefresher( - TABLE_NAME, dynamoDbClient, leaseSerializer, CONSISTENT_READS, tableCreatorCallback); - serializedLease = new HashMap<>(); + assertEquals(creationResponse, CREATING.toString(), "Index status mismatch"); + assertTrue(waitResponse); - describeTableRequest = - DescribeTableRequest.builder().tableName(TABLE_NAME).build(); - createTableRequest = CreateTableRequest.builder() - .tableName(TABLE_NAME) - .keySchema(leaseSerializer.getKeySchema()) - .attributeDefinitions(leaseSerializer.getAttributeDefinitions()) - .billingMode(BillingMode.PAY_PER_REQUEST) - .deletionProtectionEnabled(DELETION_PROTECTION_ENABLED) - .build(); - updateContinuousBackupsRequest = UpdateContinuousBackupsRequest.builder() - .tableName(TABLE_NAME) - .pointInTimeRecoverySpecification(builder -> builder.pointInTimeRecoveryEnabled(PITR_ENABLED)) - .build(); + final DescribeTableResponse describeTableResponse = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); + assertEquals( + 1, + describeTableResponse.table().globalSecondaryIndexes().size(), + "No. of index on lease table is not 1"); + assertEquals( + LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME, + describeTableResponse.table().globalSecondaryIndexes().get(0).indexName(), + "Index name mismatch"); + assertEquals( + IndexStatus.ACTIVE, + describeTableResponse.table().globalSecondaryIndexes().get(0).indexStatus()); } @Test - public void testListLeasesHandlesTimeout() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); + void waitUntilLeaseOwnerToLeaseKeyIndexExists_noTransitionToActive_assertFalse() + throws DependencyException, ProvisionedThroughputException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); - when(mockScanFuture.get(anyLong(), any(TimeUnit.class))).thenThrow(te); - when(dynamoDbClient.scan(any(ScanRequest.class))).thenReturn(mockScanFuture); + dynamoDbAsyncClient.deleteTable( + DeleteTableRequest.builder().tableName(TEST_LEASE_TABLE).build()); - verifyCancel(mockScanFuture, () -> leaseRefresher.listLeases()); + final boolean response = leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(1, 3); + assertFalse(response); + assertFalse(leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive()); } @Test - public void testListLeasesSucceedsThenFails() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); - - when(dynamoDbClient.scan(any(ScanRequest.class))).thenReturn(mockScanFuture); + void isLeaseOwnerGsiIndexActive() throws DependencyException, ProvisionedThroughputException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + + final DynamoDbAsyncClient mockDdbClient = mock(DynamoDbAsyncClient.class, Mockito.RETURNS_MOCKS); + final LeaseRefresher leaseRefresherForTest = new DynamoDBLeaseRefresher( + TEST_LEASE_TABLE, + mockDdbClient, + new DynamoDBLeaseSerializer(), + true, + NOOP_TABLE_CREATOR_CALLBACK, + Duration.ofSeconds(10), + new DdbTableConfig(), + true, + true, + new ArrayList<>()); + + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))) + .thenThrow(ResourceNotFoundException.builder() + .message("Mock table does not exist scenario") + .build()); - Map lastEvaluatedKey = new HashMap<>(); - lastEvaluatedKey.put("Test", AttributeValue.builder().s("test").build()); + // before creating the GSI it is not active + assertFalse(leaseRefresherForTest.isLeaseOwnerToLeaseKeyIndexActive()); - when(mockScanFuture.get(anyLong(), any(TimeUnit.class))) - .thenReturn(ScanResponse.builder() - .lastEvaluatedKey(lastEvaluatedKey) + reset(mockDdbClient); + final CompletableFuture creatingTableFuture = new CompletableFuture<>(); + creatingTableFuture.complete(DescribeTableResponse.builder() + .table(TableDescription.builder() + .tableStatus(TableStatus.CREATING) .build()) - .thenThrow(te); - - verifyCancel(mockScanFuture, () -> leaseRefresher.listLeases()); + .build()); + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(creatingTableFuture); + + // If describe table does not have gsi status, it will be false + assertFalse(leaseRefresherForTest.isLeaseOwnerToLeaseKeyIndexActive()); + + reset(mockDdbClient); + final CompletableFuture noGsiFuture = new CompletableFuture<>(); + noGsiFuture.complete(DescribeTableResponse.builder() + .table(TableDescription.builder() + .creationDateTime(Instant.now()) + .itemCount(100L) + .tableStatus(TableStatus.ACTIVE) + .globalSecondaryIndexes(GlobalSecondaryIndexDescription.builder() + .indexName("A_DIFFERENT_INDEX") + .indexStatus(ACTIVE) + .build()) + .build()) + .build()); + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(noGsiFuture); + + // before creating the GSI it is not active + assertFalse(leaseRefresherForTest.isLeaseOwnerToLeaseKeyIndexActive()); + + reset(mockDdbClient); + final CompletableFuture gsiInactiveFuture = new CompletableFuture<>(); + gsiInactiveFuture.complete(DescribeTableResponse.builder() + .table(TableDescription.builder() + .creationDateTime(Instant.now()) + .itemCount(100L) + .tableStatus(TableStatus.ACTIVE) + .globalSecondaryIndexes( + GlobalSecondaryIndexDescription.builder() + .indexName("A_DIFFERENT_INDEX") + .indexStatus(ACTIVE) + .build(), + GlobalSecondaryIndexDescription.builder() + .indexName(LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME) + .indexStatus(CREATING) + .build()) + .build()) + .build()); + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(gsiInactiveFuture); + + // returns false if GSI is not active + assertFalse(leaseRefresherForTest.isLeaseOwnerToLeaseKeyIndexActive()); + + reset(mockDdbClient); + final CompletableFuture gsiActiveFuture = new CompletableFuture<>(); + gsiActiveFuture.complete(DescribeTableResponse.builder() + .table(TableDescription.builder() + .creationDateTime(Instant.now()) + .itemCount(100L) + .tableStatus(TableStatus.ACTIVE) + .globalSecondaryIndexes( + GlobalSecondaryIndexDescription.builder() + .indexName("A_DIFFERENT_INDEX") + .indexStatus(ACTIVE) + .build(), + GlobalSecondaryIndexDescription.builder() + .indexName(LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME) + .indexStatus(ACTIVE) + .build()) + .build()) + .build()); + when(mockDdbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(gsiActiveFuture); - verify(mockScanFuture, times(2)).get(anyLong(), any(TimeUnit.class)); - verify(dynamoDbClient, times(2)).scan(any(ScanRequest.class)); + // returns true if GSI is not active + assertTrue(leaseRefresherForTest.isLeaseOwnerToLeaseKeyIndexActive()); } @Test - public void testCreateLeaseIfNotExistsTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); - - when(dynamoDbClient.putItem(any(PutItemRequest.class))).thenReturn(mockPutItemFuture); - when(mockPutItemFuture.get(anyLong(), any())).thenThrow(te); + void assignLease_leaseWithPrevOwner_assertAssignmentToNewOwner() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", "leaseOwner1")); - when(leaseSerializer.toDynamoRecord(any())).thenReturn(serializedLease); - when(leaseSerializer.getDynamoNonexistantExpectation()).thenReturn(Collections.emptyMap()); - - verifyCancel(mockPutItemFuture, () -> leaseRefresher.createLeaseIfNotExists(lease)); + // Fetch a lease from assign it to owner2 + boolean response = leaseRefresher.assignLease(leaseRefresher.getLease("lease1"), "leaseOwner2"); + assertTrue(response); + assertEquals(leaseRefresher.getLease("lease1").leaseOwner(), "leaseOwner2"); } @Test - public void testWaitUntilLeaseTableExistsUpdatingStatus() throws Exception { - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenReturn(DescribeTableResponse.builder() - .table(TableDescription.builder() - .tableStatus(TableStatus.UPDATING) - .build()) - .build()); - assertTrue(leaseRefresher.waitUntilLeaseTableExists(0, 0)); - } + void assignLease_unassignedLease_assertAssignmentToNewOwner() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", null)); - @Test - public void testWaitUntilLeaseTableExistsActiveStatus() throws Exception { - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenReturn(DescribeTableResponse.builder() - .table(TableDescription.builder() - .tableStatus(TableStatus.ACTIVE) - .build()) - .build()); - assertTrue(leaseRefresher.waitUntilLeaseTableExists(0, 0)); + // Fetch a lease from assign it to owner2 + boolean response = leaseRefresher.assignLease(leaseRefresher.getLease("lease1"), "leaseOwner2"); + assertTrue(response); + assertEquals(leaseRefresher.getLease("lease1").leaseOwner(), "leaseOwner2"); } + // validates that the lease assignment fails if unassigned lease after fetch is deleted @Test - public void testWaitUntilLeaseTableExistsCreatingStatus() throws Exception { - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenReturn(DescribeTableResponse.builder() - .table(TableDescription.builder() - .tableStatus(TableStatus.CREATING) - .build()) - .build()); - assertFalse(leaseRefresher.waitUntilLeaseTableExists(0, 0)); - } + void assignLease_unAssignedLeaseGetsDeleted_assertAssignemntFailure() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", null)); - @Test - public void testWaitUntilLeaseTableExistsDeletingStatus() throws Exception { - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenReturn(DescribeTableResponse.builder() - .table(TableDescription.builder() - .tableStatus(TableStatus.DELETING) - .build()) - .build()); - assertFalse(leaseRefresher.waitUntilLeaseTableExists(0, 0)); + // Lease fetched before delete + final Lease leaseFetchedBeforeDelete = leaseRefresher.getLease("lease1"); + + // Deleted lease + leaseRefresher.deleteLease(leaseRefresher.getLease("lease1")); + assertNull(leaseRefresher.getLease("lease1")); + + // Assert that in this case the lease assignment fails + boolean response = leaseRefresher.assignLease(leaseFetchedBeforeDelete, "leaseOwner2"); + assertFalse(response); + assertNull(leaseRefresher.getLease("lease1")); } + // validates that the lease assignment fails if assigned lease after fetch is deleted + @Test + void assignLease_AssignedLeaseGetsDeleted_assertAssignemntFailure() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", "leaseOwner1")); + + // Lease fetched before delete + final Lease leaseFetchedBeforeDelete = leaseRefresher.getLease("lease1"); + + // Deleted lease + leaseRefresher.deleteLease(leaseRefresher.getLease("lease1")); + assertNull(leaseRefresher.getLease("lease1")); + + // Assert that in this case the lease assignment fails + boolean response = leaseRefresher.assignLease(leaseFetchedBeforeDelete, "leaseOwner2"); + assertFalse(response); + assertNull(leaseRefresher.getLease("lease1")); + } + + /** + * This test validates the behavior that a lease is assigned as long a leaseOwner has not changed but other + * field like leaseCounter or checkpoint updates are done after fetch and before assign call. And also + * validates that after assignment the updates on the lease with old references fails. + */ @Test - public void testGetLeaseTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); + void assignLease_updatesOnTheLeaseFailsAfterAssignment() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final Lease originalLeaseOnWorker = createDummyLease("lease1", "leaseOwner1"); + leaseRefresher.createLeaseIfNotExists(originalLeaseOnWorker); - when(dynamoDbClient.getItem(any(GetItemRequest.class))).thenReturn(mockGetItemFuture); - when(mockGetItemFuture.get(anyLong(), any())).thenThrow(te); + // Normal lease renewal happens + leaseRefresher.renewLease(originalLeaseOnWorker); + leaseRefresher.renewLease(originalLeaseOnWorker); - when(leaseSerializer.getDynamoHashKey(anyString())).thenReturn(Collections.emptyMap()); + // Checkpoint + originalLeaseOnWorker.checkpoint(new ExtendedSequenceNumber("100")); + leaseRefresher.updateLease(originalLeaseOnWorker); - verifyCancel(mockGetItemFuture, () -> leaseRefresher.getLease("test")); - } + // Asserting that the updates have gone correctly + assertEquals(3, leaseRefresher.getLease("lease1").leaseCounter(), "LeaseCounter mismatch"); - @Test - public void testRenewLeaseTimesOut() throws Exception { - setupUpdateItemTest(); - verifyCancel(mockUpdateFuture, () -> leaseRefresher.renewLease(lease)); - } + // Lease is read for assignment (e.g. for LAM) + final Lease freshFetchedLease = leaseRefresher.getLease("lease1"); - @Test - public void testTakeLeaseTimesOut() throws Exception { - setupUpdateItemTest(); - verifyCancel(mockUpdateFuture, () -> leaseRefresher.takeLease(lease, "owner")); + // Normal lease renewal and checkpoint happens again. + leaseRefresher.renewLease(originalLeaseOnWorker); + originalLeaseOnWorker.checkpoint(new ExtendedSequenceNumber("105")); + leaseRefresher.updateLease(originalLeaseOnWorker); + assertEquals(5, leaseRefresher.getLease("lease1").leaseCounter(), "LeaseCounter mismatch"); + + // assert assignment happens on lease object as the owner has not changed only heartbeat and checkpoint has + // updated. + final boolean assignmentResponse = leaseRefresher.assignLease(freshFetchedLease, "owner2"); + assertTrue(assignmentResponse, "Assignment on lease failed"); + assertEquals(6, leaseRefresher.getLease("lease1").leaseCounter(), "LeaseCounter mismatch"); + + // Assert that update or renwer fails after assignment using originalLeaseOnWorker instance. + assertFalse(leaseRefresher.updateLease(originalLeaseOnWorker), "Update on lease happened after reassignment"); + assertFalse(leaseRefresher.renewLease(originalLeaseOnWorker), "Update on lease happened after reassignment"); + assertEquals(6, leaseRefresher.getLease("lease1").leaseCounter(), "LeaseCounter mismatch"); } @Test - public void testEvictLeaseTimesOut() throws Exception { - setupUpdateItemTest(); - verifyCancel(mockUpdateFuture, () -> leaseRefresher.evictLease(lease)); + void listLeasesParallely_sanity() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", "leaseOwner1")); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease2", "leaseOwner2")); + final Map.Entry, List> response = + leaseRefresher.listLeasesParallely(Executors.newFixedThreadPool(2), 2); + assertEquals(2, response.getKey().size()); + assertEquals(0, response.getValue().size()); } @Test - public void testUpdateLeaseTimesOut() throws Exception { - setupUpdateItemTest(); - verifyCancel(mockUpdateFuture, () -> leaseRefresher.updateLease(lease)); + void listLeasesParallely_leaseWithFailingDeserialization_assertCorrectResponse() + throws ProvisionedThroughputException, DependencyException, InvalidStateException { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + leaseRefresher.createLeaseIfNotExists(createDummyLease("lease1", "leaseOwner1")); + createAndPutBadLeaseEntryInTable(); + final Map.Entry, List> response = + leaseRefresher.listLeasesParallely(Executors.newFixedThreadPool(2), 2); + assertEquals(1, response.getKey().size()); + assertEquals("lease1", response.getKey().get(0).leaseKey()); + assertEquals(1, response.getValue().size()); + assertEquals("badLeaseKey", response.getValue().get(0)); } @Test - public void testDeleteAllLeasesTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); - when(dynamoDbClient.scan(any(ScanRequest.class))).thenReturn(mockScanFuture); - when(mockScanFuture.get(anyLong(), any())) - .thenReturn(ScanResponse.builder().items(Collections.emptyMap()).build()); - when(leaseSerializer.fromDynamoRecord(any())).thenReturn(lease); - when(leaseSerializer.getDynamoHashKey(any(Lease.class))).thenReturn(Collections.emptyMap()); - - when(dynamoDbClient.deleteItem(any(DeleteItemRequest.class))).thenReturn(mockDeleteFuture); - when(mockDeleteFuture.get(anyLong(), any())).thenThrow(te); + void initiateGracefulLeaseHandoff_sanity() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; + final Lease lease = createDummyLease("lease1", currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRefresher.initiateGracefulLeaseHandoff(lease, nextOwner); + final Lease updatedLease = leaseRefresher.getLease(lease.leaseKey()); - verifyCancel(mockDeleteFuture, () -> leaseRefresher.deleteAll()); + assertEquals(nextOwner, updatedLease.leaseOwner()); + assertEquals(currentOwner, updatedLease.checkpointOwner()); } @Test - public void testDeleteLeaseTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); - when(leaseSerializer.getDynamoHashKey(any(Lease.class))).thenReturn(Collections.emptyMap()); - - when(dynamoDbClient.deleteItem(any(DeleteItemRequest.class))).thenReturn(mockDeleteFuture); - when(mockDeleteFuture.get(anyLong(), any())).thenThrow(te); - - verifyCancel(mockDeleteFuture, () -> leaseRefresher.deleteLease(lease)); + void initiateGracefulLeaseHandoff_conditionalFailure() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; + final Lease lease = createDummyLease("lease1", currentOwner); + // should not assign if there is a checkpointOwner is not null. + lease.checkpointOwner(currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); + assertFalse(leaseRefresher.initiateGracefulLeaseHandoff(lease, nextOwner)); } @Test - public void testLeaseTableExistsTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); - - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())).thenThrow(te); - - verifyCancel(mockDescribeTableFuture, () -> leaseRefresher.leaseTableExists()); + void renewLease_testGracefulShutdown_updateLeaseWhenDetectedShutdown() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + DynamoDBLeaseRefresher leaseRefresherSpy = spy(leaseRefresher); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; + final Lease lease = createDummyLease("lease1", currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRefresher.initiateGracefulLeaseHandoff(lease, nextOwner); + // remove local checkpointOwner and reset leaseOwner to pretend we don't know that shutdown is requested + lease.checkpointOwner(null); + lease.leaseOwner(currentOwner); + // renew should see that the lease has the shutdown attributes and so mark them on the passed-in lease. + assertTrue(leaseRefresherSpy.renewLease(lease)); + assertEquals(currentOwner, lease.checkpointOwner()); + assertEquals(nextOwner, lease.leaseOwner()); + assertEquals(lease, leaseRefresher.getLease(lease.leaseKey())); + verify(leaseRefresherSpy, times(2)).renewLease(lease); } @Test - public void testCreateLeaseTableProvisionedBillingModeIfNotExists() throws Exception { - leaseRefresher = new DynamoDBLeaseRefresher( - TABLE_NAME, - dynamoDbClient, - leaseSerializer, - CONSISTENT_READS, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PROVISIONED, - DELETION_PROTECTION_ENABLED); - - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - - final ProvisionedThroughput throughput = ProvisionedThroughput.builder() - .readCapacityUnits(10L) - .writeCapacityUnits(10L) - .build(); - final CreateTableRequest createTableRequest = CreateTableRequest.builder() - .tableName(TABLE_NAME) - .keySchema(leaseSerializer.getKeySchema()) - .attributeDefinitions(leaseSerializer.getAttributeDefinitions()) - .provisionedThroughput(throughput) - .deletionProtectionEnabled(DELETION_PROTECTION_ENABLED) - .build(); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenReturn(null); - - final boolean result = leaseRefresher.createLeaseTableIfNotExists(10L, 10L); - - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - Assert.assertTrue(result); + void renewLease_testGracefulShutdown_conditionalFailureDueToNoLeaseInDdb_NotTryingToRenew() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + DynamoDBLeaseRefresher leaseRefresherSpy = spy(leaseRefresher); + setupTable(leaseRefresher); + final Lease lease = createDummyLease("lease1", "currentOwner"); + assertFalse(leaseRefresherSpy.renewLease(lease)); + verify(leaseRefresherSpy, times(1)).renewLease(lease); } @Test - public void testCreateLeaseTableWithTagsIfNotExists() throws Exception { - leaseRefresher = new DynamoDBLeaseRefresher( - TABLE_NAME, - dynamoDbClient, - leaseSerializer, - CONSISTENT_READS, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PROVISIONED, - DELETION_PROTECTION_ENABLED, - TAGS); - - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS)) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); + void renewLease_testGracefulShutdown_remoteLeaseHasDifferentOwner_NotTryingToRenew() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + DynamoDBLeaseRefresher leaseRefresherSpy = spy(leaseRefresher); + final Lease lease = createDummyLease("lease1", "currentOwner"); + final Lease originalLease = lease.copy(); + leaseRefresher.createLeaseIfNotExists(lease); - final ProvisionedThroughput throughput = ProvisionedThroughput.builder() - .readCapacityUnits(10L) - .writeCapacityUnits(10L) - .build(); - final CreateTableRequest createTableRequest = CreateTableRequest.builder() - .tableName(TABLE_NAME) - .keySchema(leaseSerializer.getKeySchema()) - .attributeDefinitions(leaseSerializer.getAttributeDefinitions()) - .provisionedThroughput(throughput) - .deletionProtectionEnabled(DELETION_PROTECTION_ENABLED) - .tags(TAGS) - .build(); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS)) - .thenReturn(null); - - final boolean result = leaseRefresher.createLeaseTableIfNotExists(10L, 10L); + // call assignLease to change owner and call initiateGracefulLeaseHandoff to add shutdown attribute + leaseRefresher.assignLease(lease, "nextOwner"); + leaseRefresher.initiateGracefulLeaseHandoff(lease, "nextOwner2"); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS); - verify(mockCreateTableFuture, times(1)) - .get(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS); - Assert.assertTrue(result); + assertFalse(leaseRefresherSpy.renewLease(originalLease)); + verify(leaseRefresherSpy, times(1)).renewLease(originalLease); } @Test - public void testCreateLeaseTableIfNotExists() throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenReturn(null); - - final boolean result = leaseRefresher.createLeaseTableIfNotExists(); - - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - Assert.assertTrue(result); + void renewLease_testGracefulShutdown_continueUpdateLeaseUntilLeaseIsTransferred() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; + final Lease lease = createDummyLease("lease1", currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); + + assertTrue(leaseRefresher.initiateGracefulLeaseHandoff(lease, nextOwner)); + // try consecutive renews and see if they pass + assertTrue(leaseRefresher.renewLease(lease)); + assertTrue(leaseRefresher.renewLease(lease)); + + // now we call assignLease, this will remove the checkpointOwner attribute and increment leaseCounter + final Long currentCounter = lease.leaseCounter(); + assertTrue(leaseRefresher.assignLease(lease, lease.leaseOwner())); + assertEquals(currentCounter + 1, lease.leaseCounter()); + // On the lease renewal side, we want to pretend to simulate that the current owner doesn't know about the + // lease re-assignment yet. So we reset leaseCounter and the owner fields. + lease.leaseCounter(currentCounter); + lease.leaseOwner(nextOwner); + lease.checkpointOwner(currentOwner); + assertFalse(leaseRefresher.renewLease(lease)); } @Test - public void testCreateLeaseTableIfNotExistsWithPitrEnabled() throws Exception { - DynamoDBLeaseRefresher leaseRefresherWithEnabledPitr = new DynamoDBLeaseRefresher( - TABLE_NAME, - dynamoDbClient, - leaseSerializer, - CONSISTENT_READS, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PAY_PER_REQUEST, - DELETION_PROTECTION_ENABLED, - PITR_ENABLED, - EMPTY_TAGS); - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenReturn(null); - when(dynamoDbClient.updateContinuousBackups(updateContinuousBackupsRequest)) - .thenReturn(mockUpdateContinuousBackupsFuture); - when(mockUpdateContinuousBackupsFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenReturn(null); - final boolean result = leaseRefresherWithEnabledPitr.createLeaseTableIfNotExists(); - - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(dynamoDbClient, times(1)).updateContinuousBackups(updateContinuousBackupsRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - Assert.assertTrue(result); + void assignLease_alwaysRemoveCheckpointOwner() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; + final Lease lease = createDummyLease("lease1", currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRefresher.initiateGracefulLeaseHandoff(lease, nextOwner); + + assertEquals(currentOwner, leaseRefresher.getLease(lease.leaseKey()).checkpointOwner()); + assertTrue(leaseRefresher.assignLease(lease, nextOwner)); + final Lease updatedLease = leaseRefresher.getLease(lease.leaseKey()); + assertNull(updatedLease.checkpointOwner()); + assertEquals(nextOwner, updatedLease.leaseOwner()); } @Test - public void testCreateLeaseTableProvisionedWithDeletionProtectionIfNotExists() throws Exception { - DynamoDBLeaseRefresher leaseRefresherWithEnabledDeletionProtection = new DynamoDBLeaseRefresher( - TABLE_NAME, - dynamoDbClient, - leaseSerializer, - CONSISTENT_READS, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PROVISIONED, - true); - - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); + void assignLease_conditionalFailureBecauseCheckpointOwnerIsNotExpected() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final String nextOwner = "nextOwner"; + final String currentOwner = "currentOwner"; - final ProvisionedThroughput throughput = ProvisionedThroughput.builder() - .readCapacityUnits(10L) - .writeCapacityUnits(10L) - .build(); - final CreateTableRequest createTableRequest = CreateTableRequest.builder() - .tableName(TABLE_NAME) - .keySchema(leaseSerializer.getKeySchema()) - .attributeDefinitions(leaseSerializer.getAttributeDefinitions()) - .provisionedThroughput(throughput) - .deletionProtectionEnabled(true) - .build(); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenReturn(null); - - final boolean result = leaseRefresherWithEnabledDeletionProtection.createLeaseTableIfNotExists(10L, 10L); + final Lease lease = createDummyLease("lease1", nextOwner); + lease.checkpointOwner(currentOwner); + leaseRefresher.createLeaseIfNotExists(lease); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - Assert.assertTrue(result); + lease.checkpointOwner("someone else now"); + assertFalse(leaseRefresher.assignLease(lease, lease.leaseOwner())); } @Test - public void testCreateLeaseTableIfNotExists_throwsDependencyException() throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(new InterruptedException()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceInUseException.builder() - .message("Table already exists") - .build()); + void createLeaseTableIfNotExists_billingModeProvisioned_assertCorrectModeAndCapacity() throws Exception { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createProvisionedTableConfig(), dbAsyncClient); + setupTable(leaseRefresher); + + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - Assert.assertFalse(leaseRefresher.createLeaseTableIfNotExists()); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); + assertProvisionTableMode(describeTableResponse, 100L, 200L); } @Test - public void testCreateLeaseTableIfNotExists_tableAlreadyExists_throwsResourceInUseException_expectFalse() - throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceInUseException.builder() - .message("Table already exists") - .build()); + void createLeaseTableIfNotExists_billingModeOnDemand_assertCorrectMode() throws Exception { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createOnDemandTableConfig(), dbAsyncClient); + setupTable(leaseRefresher); + + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - Assert.assertFalse(leaseRefresher.createLeaseTableIfNotExists()); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); + assertOnDemandTableMode(describeTableResponse); } @Test - public void testCreateLeaseTableIfNotExists_throwsLimitExceededException_expectProvisionedThroughputException() - throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(LimitExceededException.builder().build()); + void createLeaseTableIfNotExistsOverloadedMethod_billingModeOnDemand_assertProvisionModeWithOveridenCapacity() + throws DependencyException, ProvisionedThroughputException { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createOnDemandTableConfig(), dbAsyncClient); + leaseRefresher.createLeaseTableIfNotExists(50L, 100L); + leaseRefresher.waitUntilLeaseTableExists(1, 1000); + + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - Assert.assertThrows(ProvisionedThroughputException.class, () -> leaseRefresher.createLeaseTableIfNotExists()); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); + assertProvisionTableMode(describeTableResponse, 50L, 100L); } @Test - public void testCreateLeaseTableIfNotExists_throwsDynamoDbException_expectDependencyException() throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(DynamoDbException.builder().build()); + void createLeaseTableIfNotExistsOverloadedMethod_billingModeProvisioned_assertProvisionModeWithOveridenCapacity() + throws ProvisionedThroughputException, DependencyException { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createProvisionedTableConfig(), dbAsyncClient); + leaseRefresher.createLeaseTableIfNotExists(50L, 100L); + leaseRefresher.waitUntilLeaseTableExists(1, 1000); + + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); - Assert.assertThrows(DependencyException.class, () -> leaseRefresher.createLeaseTableIfNotExists()); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); + assertProvisionTableMode(describeTableResponse, 50L, 100L); } @Test - public void testCreateLeaseTableIfNotExists_throwsTimeoutException_expectDependencyException() throws Exception { - when(dynamoDbClient.describeTable(describeTableRequest)).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); - when(dynamoDbClient.createTable(createTableRequest)).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get( - eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS))) - .thenThrow(new TimeoutException()); + void createLeaseOwnerToLeaseKeyIndexIfNotExists_baseTableInProvisionedMode_assertGSIInProvisionedMode() + throws ProvisionedThroughputException, DependencyException { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createProvisionedTableConfig(), dbAsyncClient); - Assert.assertThrows(DependencyException.class, () -> leaseRefresher.createLeaseTableIfNotExists()); - verify(dynamoDbClient, times(1)).describeTable(describeTableRequest); - verify(dynamoDbClient, times(1)).createTable(createTableRequest); - verify(mockDescribeTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); - verify(mockCreateTableFuture, times(1)) - .get(eq(LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT.toMillis()), eq(TimeUnit.MILLISECONDS)); + // Creates base table and GSI + setupTableWithLeaseKeyIndex(leaseRefresher); + + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); + + assertProvisionTableMode(describeTableResponse, 100L, 200L); + assertEquals( + 100L, + describeTableResponse + .table() + .globalSecondaryIndexes() + .get(0) + .provisionedThroughput() + .readCapacityUnits(), + "GSI RCU is not 100L"); + assertEquals( + 200L, + describeTableResponse + .table() + .globalSecondaryIndexes() + .get(0) + .provisionedThroughput() + .writeCapacityUnits(), + "GSI RCU is not 100L"); } @Test - public void testCreateLeaseTableProvisionedBillingModeTimesOut() throws Exception { - leaseRefresher = new DynamoDBLeaseRefresher( - TABLE_NAME, - dynamoDbClient, - leaseSerializer, - CONSISTENT_READS, - tableCreatorCallback, - LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, - BillingMode.PROVISIONED, - false); - TimeoutException te = setRuleForDependencyTimeout(); - - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); + void createLeaseOwnerToLeaseKeyIndexIfNotExists_baseTableInOnDemandMode_assertGSIInOnDemandMode() + throws ProvisionedThroughputException, DependencyException { + final DynamoDbAsyncClient dbAsyncClient = DynamoDBEmbedded.create().dynamoDbAsyncClient(); + final LeaseRefresher leaseRefresher = createLeaseRefresher(createOnDemandTableConfig(), dbAsyncClient); - when(dynamoDbClient.createTable(any(CreateTableRequest.class))).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get(anyLong(), any())).thenThrow(te); + // Creates base table and GSI + setupTableWithLeaseKeyIndex(leaseRefresher); - verifyCancel(mockCreateTableFuture, () -> leaseRefresher.createLeaseTableIfNotExists(10L, 10L)); + final DescribeTableResponse describeTableResponse = dbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_LEASE_TABLE) + .build()) + .join(); + + assertOnDemandTableMode(describeTableResponse); + assertEquals( + 0L, + describeTableResponse + .table() + .globalSecondaryIndexes() + .get(0) + .provisionedThroughput() + .readCapacityUnits(), + "GSI RCU is not 100L"); + assertEquals( + 0L, + describeTableResponse + .table() + .globalSecondaryIndexes() + .get(0) + .provisionedThroughput() + .writeCapacityUnits(), + "GSI RCU is not 100L"); } @Test - public void testCreateLeaseTableTimesOut() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); + public void takeLease_removesCheckpointOwner() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final Lease lease = createPendingCheckpointOwnerLease(leaseRefresher); + assertTrue(leaseRefresher.takeLease(lease, "newOwner")); - when(dynamoDbClient.describeTable(any(DescribeTableRequest.class))).thenReturn(mockDescribeTableFuture); - when(mockDescribeTableFuture.get(anyLong(), any())) - .thenThrow(ResourceNotFoundException.builder() - .message("Table doesn't exist") - .build()); + final Lease updatedLease = leaseRefresher.getLease(lease.leaseKey()); + assertEquals(lease, updatedLease); + assertNull(updatedLease.checkpointOwner()); + } - when(dynamoDbClient.createTable(any(CreateTableRequest.class))).thenReturn(mockCreateTableFuture); - when(mockCreateTableFuture.get(anyLong(), any())).thenThrow(te); + @Test + public void evictLease_removesCheckpointOwner() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final Lease lease = createPendingCheckpointOwnerLease(leaseRefresher); + final long originalCounter = lease.leaseCounter(); + assertTrue(leaseRefresher.evictLease(lease)); - verifyCancel(mockCreateTableFuture, () -> leaseRefresher.createLeaseTableIfNotExists()); + final Lease updatedLease = leaseRefresher.getLease(lease.leaseKey()); + assertEquals(lease, updatedLease); + assertNull(updatedLease.checkpointOwner()); + assertNotNull(updatedLease.leaseOwner()); + assertEquals(originalCounter + 1, lease.leaseCounter()); } - @FunctionalInterface - private interface TestCaller { - void call() throws Exception; - } + @Test + public void evictLease_removesOwnerIfCheckpointOwnerIsNull() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final Lease lease = createDummyLease("1", "ownerA"); + final long originalCounter = lease.leaseCounter(); + leaseRefresher.createLeaseIfNotExists(lease); + assertTrue(leaseRefresher.evictLease(lease)); - private void verifyCancel(Future future, TestCaller toExecute) throws Exception { - try { - toExecute.call(); - } finally { - verify(future).cancel(anyBoolean()); - } + final Lease updatedLease = leaseRefresher.getLease(lease.leaseKey()); + assertEquals(lease, updatedLease); + assertNull(updatedLease.checkpointOwner()); + assertNull(updatedLease.leaseOwner()); + assertEquals(originalCounter + 1, lease.leaseCounter()); } - private void setupUpdateItemTest() throws Exception { - TimeoutException te = setRuleForDependencyTimeout(); + @Test + public void evictLease_noOpIfLeaseNotExists() throws Exception { + DynamoDBLeaseRefresher leaseRefresher = createLeaseRefresher(new DdbTableConfig(), dynamoDbAsyncClient); + setupTable(leaseRefresher); + final Lease lease = createDummyLease("1", "ownerA"); + assertFalse(leaseRefresher.evictLease(lease)); - when(leaseSerializer.getDynamoHashKey(any(Lease.class))).thenReturn(Collections.emptyMap()); - when(leaseSerializer.getDynamoLeaseCounterExpectation(any(Lease.class))).thenReturn(Collections.emptyMap()); - when(leaseSerializer.getDynamoLeaseCounterUpdate(any(Lease.class))).thenReturn(Collections.emptyMap()); - when(leaseSerializer.getDynamoTakeLeaseUpdate(any(), anyString())).thenReturn(Collections.emptyMap()); + // now evictLease should use the notExist condition to try updating the lease. + // we want to see it fails + lease.leaseOwner(null); + assertFalse(leaseRefresher.evictLease(lease)); + } - when(dynamoDbClient.updateItem(any(UpdateItemRequest.class))).thenReturn(mockUpdateFuture); - when(mockUpdateFuture.get(anyLong(), any())).thenThrow(te); + private Lease createPendingCheckpointOwnerLease(final LeaseRefresher leaseRefresher) throws Exception { + final Lease lease = createDummyLease("1", "ownerA"); + lease.checkpointOwner("checkpointOwner"); + leaseRefresher.createLeaseIfNotExists(lease); + return lease; } - private TimeoutException setRuleForDependencyTimeout() { - TimeoutException te = new TimeoutException("Timeout"); - expectedException.expect(DependencyException.class); - expectedException.expectCause(equalTo(te)); + private static void assertOnDemandTableMode(final DescribeTableResponse describeTableResponse) { + assertEquals( + BillingMode.PAY_PER_REQUEST, + describeTableResponse.table().billingModeSummary().billingMode(), + "Table mode is not PAY_PER_REQUEST"); + assertEquals( + 0L, + describeTableResponse.table().provisionedThroughput().readCapacityUnits(), + "PAY_PER_REQUEST mode on table does not have 0 RCU"); + assertEquals( + 0L, + describeTableResponse.table().provisionedThroughput().writeCapacityUnits(), + "PAY_PER_REQUEST mode on table does not have 0 WCU"); + } + + private static void assertProvisionTableMode( + final DescribeTableResponse describeTableResponse, final long rcu, final long wcu) { + // BillingModeSummary is null in case of PROVISIONED + assertNull( + describeTableResponse.table().billingModeSummary(), "BillingModeSummary is not null for provisionMode"); + assertEquals( + rcu, + describeTableResponse.table().provisionedThroughput().readCapacityUnits(), + "RCU set on the Table is incorrect"); + assertEquals( + wcu, + describeTableResponse.table().provisionedThroughput().writeCapacityUnits(), + "WCU set on the Table is incorrect"); + } + + private static DdbTableConfig createProvisionedTableConfig() { + final DdbTableConfig ddbTableConfig = new DdbTableConfig(); + ddbTableConfig.billingMode(BillingMode.PROVISIONED); + ddbTableConfig.readCapacity(100); + ddbTableConfig.writeCapacity(200); + return ddbTableConfig; + } + + private static DdbTableConfig createOnDemandTableConfig() { + final DdbTableConfig ddbTableConfig = new DdbTableConfig(); + ddbTableConfig.billingMode(BillingMode.PAY_PER_REQUEST); + return ddbTableConfig; + } + + private DynamoDBLeaseRefresher createLeaseRefresher( + final DdbTableConfig ddbTableConfig, final DynamoDbAsyncClient dynamoDbAsyncClient) { + return createLeaseRefresher(ddbTableConfig, dynamoDbAsyncClient, false, false); + } + + private DynamoDBLeaseRefresher createLeaseRefresher( + final DdbTableConfig ddbTableConfig, + final DynamoDbAsyncClient dynamoDbAsyncClient, + boolean deletionProtectionEnabled, + boolean pitrEnabled) { + return new DynamoDBLeaseRefresher( + TEST_LEASE_TABLE, + dynamoDbAsyncClient, + new DynamoDBLeaseSerializer(), + true, + NOOP_TABLE_CREATOR_CALLBACK, + Duration.ofSeconds(10), + ddbTableConfig, + deletionProtectionEnabled, + pitrEnabled, + new ArrayList<>()); + } + + private Lease createDummyLease(final String leaseKey, final String leaseOwner) { + final Lease lease = new Lease(); + lease.leaseKey(leaseKey); + lease.leaseOwner(leaseOwner); + lease.checkpoint(ExtendedSequenceNumber.TRIM_HORIZON); + return lease; + } + + private void setupTable(final LeaseRefresher refresher) throws ProvisionedThroughputException, DependencyException { + refresher.createLeaseTableIfNotExists(); + refresher.waitUntilLeaseTableExists(1, 100); + } + + private void setupTableWithLeaseKeyIndex(final LeaseRefresher refresher) + throws ProvisionedThroughputException, DependencyException { + refresher.createLeaseTableIfNotExists(); + refresher.waitUntilLeaseTableExists(1, 100); + refresher.createLeaseOwnerToLeaseKeyIndexIfNotExists(); + refresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(1, 100); + } + + // This entry is bad as it does not have required field and thus deserialization fails. + private void createAndPutBadLeaseEntryInTable() { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(TEST_LEASE_TABLE) + .item(ImmutableMap.of( + "leaseKey", AttributeValue.builder().s("badLeaseKey").build())) + .build(); - return te; + dynamoDbAsyncClient.putItem(putItemRequest); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerBillingModePayPerRequestIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerBillingModePayPerRequestIntegrationTest.java index dd6a17a27..f6cc08ae3 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerBillingModePayPerRequestIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerBillingModePayPerRequestIntegrationTest.java @@ -21,11 +21,14 @@ import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseIntegrationBillingModePayPerRequestTest; import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.exceptions.LeasingException; +import software.amazon.kinesis.metrics.MetricsFactory; import software.amazon.kinesis.metrics.NullMetricsFactory; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; @@ -42,6 +45,11 @@ public class DynamoDBLeaseRenewerBillingModePayPerRequestIntegrationTest // This test case's leases last 2 seconds private static final long LEASE_DURATION_MILLIS = 2000L; + @Mock + private LeaseStatsRecorder leaseStatsRecorder; + + private static final MetricsFactory NULL_METRICS_FACTORY = new NullMetricsFactory(); + private LeaseRenewer renewer; @Before @@ -51,7 +59,9 @@ public void setup() { "foo", LEASE_DURATION_MILLIS, Executors.newCachedThreadPool(), - new NullMetricsFactory()); + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); } @Test @@ -162,7 +172,7 @@ public void testUpdateLease() throws LeasingException { } @Test - public void testUpdateLostLease() throws LeasingException { + public void testUpdateLostLease() throws Exception { TestHarnessBuilder builder = new TestHarnessBuilder(leaseRefresher); builder.withLease("1", "foo").build(); @@ -262,7 +272,13 @@ public void testInitialize() throws LeasingException { builder.withLease(shardId, owner); Map leases = builder.build(); DynamoDBLeaseRenewer renewer = new DynamoDBLeaseRenewer( - leaseRefresher, owner, 30000L, Executors.newCachedThreadPool(), new NullMetricsFactory()); + leaseRefresher, + owner, + 30000L, + Executors.newCachedThreadPool(), + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); renewer.initialize(); Map heldLeases = renewer.getCurrentlyHeldLeases(); assertThat(heldLeases.size(), equalTo(leases.size())); @@ -277,7 +293,13 @@ public void testInitializeBillingMode() throws LeasingException { builder.withLease(shardId, owner); Map leases = builder.build(); DynamoDBLeaseRenewer renewer = new DynamoDBLeaseRenewer( - leaseRefresher, owner, 30000L, Executors.newCachedThreadPool(), new NullMetricsFactory()); + leaseRefresher, + owner, + 30000L, + Executors.newCachedThreadPool(), + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); renewer.initialize(); Map heldLeases = renewer.getCurrentlyHeldLeases(); assertThat(heldLeases.size(), equalTo(leases.size())); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerIntegrationTest.java index 5abd3a4bf..617f9d608 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerIntegrationTest.java @@ -21,11 +21,14 @@ import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import software.amazon.kinesis.leases.Lease; import software.amazon.kinesis.leases.LeaseIntegrationTest; import software.amazon.kinesis.leases.LeaseRenewer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.exceptions.LeasingException; +import software.amazon.kinesis.metrics.MetricsFactory; import software.amazon.kinesis.metrics.NullMetricsFactory; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; @@ -41,6 +44,11 @@ public class DynamoDBLeaseRenewerIntegrationTest extends LeaseIntegrationTest { // This test case's leases last 2 seconds private static final long LEASE_DURATION_MILLIS = 2000L; + @Mock + private LeaseStatsRecorder leaseStatsRecorder; + + private static final MetricsFactory NULL_METRICS_FACTORY = new NullMetricsFactory(); + private LeaseRenewer renewer; @Before @@ -50,7 +58,9 @@ public void setup() { "foo", LEASE_DURATION_MILLIS, Executors.newCachedThreadPool(), - new NullMetricsFactory()); + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); } @Test @@ -161,7 +171,7 @@ public void testUpdateLease() throws LeasingException { } @Test - public void testUpdateLostLease() throws LeasingException { + public void testUpdateLostLease() throws Exception { TestHarnessBuilder builder = new TestHarnessBuilder(leaseRefresher); builder.withLease("1", "foo").build(); @@ -261,7 +271,13 @@ public void testInitialize() throws LeasingException { builder.withLease(shardId, owner); Map leases = builder.build(); DynamoDBLeaseRenewer renewer = new DynamoDBLeaseRenewer( - leaseRefresher, owner, 30000L, Executors.newCachedThreadPool(), new NullMetricsFactory()); + leaseRefresher, + owner, + 30000L, + Executors.newCachedThreadPool(), + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); renewer.initialize(); Map heldLeases = renewer.getCurrentlyHeldLeases(); assertThat(heldLeases.size(), equalTo(leases.size())); @@ -276,7 +292,13 @@ public void testInitializeBillingMode() throws LeasingException { builder.withLease(shardId, owner); Map leases = builder.build(); DynamoDBLeaseRenewer renewer = new DynamoDBLeaseRenewer( - leaseRefresher, owner, 30000L, Executors.newCachedThreadPool(), new NullMetricsFactory()); + leaseRefresher, + owner, + 30000L, + Executors.newCachedThreadPool(), + NULL_METRICS_FACTORY, + leaseStatsRecorder, + lease -> {}); renewer.initialize(); Map heldLeases = renewer.getCurrentlyHeldLeases(); assertThat(heldLeases.size(), equalTo(leases.size())); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerTest.java index 16a443c17..d6d0a5f6d 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseRenewerTest.java @@ -1,171 +1,245 @@ -/* - * Copyright 2019 Amazon.com, Inc. or its affiliates. - * Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package software.amazon.kinesis.leases.dynamodb; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; +import java.time.Duration; +import java.util.Map; import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; +import java.util.concurrent.Future; +import java.util.function.Consumer; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.mockito.Mock; -import org.mockito.runners.MockitoJUnitRunner; -import software.amazon.kinesis.common.HashKeyRangeForLease; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.kinesis.common.DdbTableConfig; import software.amazon.kinesis.leases.Lease; -import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.LeaseManagementConfig; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.exceptions.DependencyException; import software.amazon.kinesis.leases.exceptions.InvalidStateException; import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException; import software.amazon.kinesis.metrics.NullMetricsFactory; -import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.doThrow; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.leases.dynamodb.TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK; +import static software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber.LATEST; +import static software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber.TRIM_HORIZON; + +class DynamoDBLeaseRenewerTest { + + private static final String TEST_NUMBER_VALUE_BELOW_DDB_RANGE = + "0.00000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000001"; + private static final String TEST_NUMBER_HIGHER_PRECISION = "1.00000000000000000000000000000000000001"; + private static final String TEST_NUMBER_WITH_HIGH_DECIMAL_VALUES = + "0.00000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000016843473634062791"; + private static final String TEST_NUMBER_WITH_ALL_ZERO_DECIMAL_VALUES = + "0.00000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000"; + + private static final String WORKER_ID = "WorkerId"; + private static final String TEST_LEASE_TABLE = "SomeTable"; + private DynamoDBLeaseRefresher leaseRefresher; + private DynamoDBLeaseRenewer leaseRenewer; + private LeaseStatsRecorder leaseStatsRecorder; -@RunWith(MockitoJUnitRunner.class) -public class DynamoDBLeaseRenewerTest { - private final String workerIdentifier = "WorkerId"; - private final long leaseDurationMillis = 10000; - private DynamoDBLeaseRenewer renewer; - private List leasesToRenew; + @Mock + private Consumer mockLeaseGracefulShutdownCallBack; @Mock - private LeaseRefresher leaseRefresher; - - private static Lease newLease(String leaseKey) { - return new Lease( - leaseKey, - "LeaseOwner", - 0L, - UUID.randomUUID(), - System.nanoTime(), - null, - null, - 1L, - new HashSet<>(), - new HashSet<>(), - null, - HashKeyRangeForLease.deserialize("1", "2")); - } + private ExecutorService mockExecutorService; - @Before - public void before() { - leasesToRenew = null; - renewer = new DynamoDBLeaseRenewer( + @Mock + private Future mockFuture; + + private Callable leaseRenewalCallable; + + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); + + @BeforeEach + void setup() throws Exception { + MockitoAnnotations.initMocks(this); + this.leaseStatsRecorder = Mockito.mock(LeaseStatsRecorder.class); + this.leaseRefresher = new DynamoDBLeaseRefresher( + TEST_LEASE_TABLE, + dynamoDbAsyncClient, + new DynamoDBLeaseSerializer(), + true, + NOOP_TABLE_CREATOR_CALLBACK, + LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT, + new DdbTableConfig().billingMode(BillingMode.PAY_PER_REQUEST), + LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED, + LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED, + DefaultSdkAutoConstructList.getInstance()); + this.leaseRenewer = new DynamoDBLeaseRenewer( leaseRefresher, - workerIdentifier, - leaseDurationMillis, - Executors.newCachedThreadPool(), - new NullMetricsFactory()); + WORKER_ID, + Duration.ofHours(1).toMillis(), + Executors.newFixedThreadPool(1), + new NullMetricsFactory(), + leaseStatsRecorder, + mockLeaseGracefulShutdownCallBack); + this.leaseRefresher.createLeaseTableIfNotExists(); + this.leaseRefresher.waitUntilLeaseTableExists(1, 30); } - @After - public void after() throws DependencyException, InvalidStateException, ProvisionedThroughputException { - if (leasesToRenew == null) { - return; - } - for (Lease lease : leasesToRenew) { - verify(leaseRefresher, times(1)).renewLease(eq(lease)); - } + @ParameterizedTest + @CsvSource({ + TEST_NUMBER_VALUE_BELOW_DDB_RANGE + ",0.0", + TEST_NUMBER_HIGHER_PRECISION + ",1.0", + "1.024,1.024", + "1024.1024, 1024.1024", + "1024.102412324, 1024.102412", + "1999999.123213213123123213213, 1999999.123213", + TEST_NUMBER_WITH_HIGH_DECIMAL_VALUES + ",0.0", + TEST_NUMBER_WITH_ALL_ZERO_DECIMAL_VALUES + ",0.0" + }) + void renewLeases_withDifferentInputFromLeaseRecorder_assertNoFailureAndExpectedValue( + final String inputNumber, final double expected) throws Exception { + when(leaseStatsRecorder.getThroughputKBps("key-1")).thenReturn(Double.parseDouble(inputNumber)); + final Lease lease = createDummyLease("key-1", WORKER_ID); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRenewer.addLeasesToRenew(ImmutableList.of(lease)); + leaseRenewer.renewLeases(); + assertEquals(expected, leaseRefresher.getLease("key-1").throughputKBps(), "Throughput value is not matching"); } @Test - public void testLeaseRenewerHoldsGoodLeases() - throws DependencyException, InvalidStateException, ProvisionedThroughputException { - /* - * Prepare leases to be renewed - * 2 Good - */ - Lease lease1 = newLease("1"); - Lease lease2 = newLease("2"); - leasesToRenew = Arrays.asList(lease1, lease2); - renewer.addLeasesToRenew(leasesToRenew); - - doReturn(true).when(leaseRefresher).renewLease(lease1); - doReturn(true).when(leaseRefresher).renewLease(lease2); - - renewer.renewLeases(); - - assertEquals(2, renewer.getCurrentlyHeldLeases().size()); + void renewLeases_enqueueShutdownRequestedLease_sanity() throws Exception { + createRenewer(leaseRefresher); + final Lease lease = createDummyLease("key-1", WORKER_ID); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRenewer.addLeasesToRenew(ImmutableList.of(lease)); + leaseRenewer.renewLeases(); + leaseRenewalCallable.call(); + verify(mockLeaseGracefulShutdownCallBack, never()).accept(lease); + + leaseRefresher.initiateGracefulLeaseHandoff(lease, "newOwner"); + leaseRenewalCallable.call(); + verify(mockLeaseGracefulShutdownCallBack, times(1)).accept(any()); + + leaseRenewalCallable.call(); + verify(mockLeaseGracefulShutdownCallBack, times(2)).accept(any()); } @Test - public void testLeaseRenewerDoesNotRenewExpiredLease() - throws DependencyException, InvalidStateException, ProvisionedThroughputException { - String leaseKey = "expiredLease"; - long initialCounterIncrementNanos = 5L; // "expired" time. - Lease lease1 = newLease(leaseKey); - lease1.lastCounterIncrementNanos(initialCounterIncrementNanos); - - leasesToRenew = new ArrayList<>(); - leasesToRenew.add(lease1); - doReturn(true).when(leaseRefresher).renewLease(lease1); - renewer.addLeasesToRenew(leasesToRenew); - - assertTrue(lease1.isExpired(1, System.nanoTime())); - assertNull(renewer.getCurrentlyHeldLease(leaseKey)); - renewer.renewLeases(); - // Don't renew lease(s) with same key if getCurrentlyHeldLease returned null previously - assertNull(renewer.getCurrentlyHeldLease(leaseKey)); - assertFalse(renewer.getCurrentlyHeldLeases().containsKey(leaseKey)); - - // Clear the list to avoid triggering expectation mismatch in after(). - leasesToRenew.clear(); + void renewLeases_withHighInitialDecimalDigit_assertUpdateWithoutFailureAndNewStats() throws Exception { + when(leaseStatsRecorder.getThroughputKBps("key-1")).thenReturn(0.10000000000000000001); + final Lease lease = createDummyLease("key-1", WORKER_ID); + lease.throughputKBps( + 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001); + leaseRefresher.createLeaseIfNotExists(lease); + leaseRenewer.addLeasesToRenew(ImmutableList.of(lease)); + leaseRenewer.renewLeases(); + assertEquals(0.1D, leaseRefresher.getLease("key-1").throughputKBps(), "Throughput value is not matching"); } - @Test - public void testLeaseRenewerDoesNotUpdateInMemoryLeaseIfDDBFailsUpdate() - throws DependencyException, InvalidStateException, ProvisionedThroughputException { - String leaseKey = "leaseToUpdate"; - Lease lease = newLease(leaseKey); - lease.checkpoint(ExtendedSequenceNumber.LATEST); - leasesToRenew = new ArrayList<>(); - leasesToRenew.add(lease); - renewer.addLeasesToRenew(leasesToRenew); + private Lease createDummyLease(final String leaseKey, final String leaseOwner) { + final Lease lease = new Lease(); + lease.leaseKey(leaseKey); + lease.checkpoint(TRIM_HORIZON); + lease.leaseOwner(leaseOwner); + lease.leaseCounter(123L); + lease.throughputKBps(1); + lease.lastCounterIncrementNanos(System.nanoTime()); + return lease; + } - doReturn(true).when(leaseRefresher).renewLease(lease); - renewer.renewLeases(); + @Test + void initialize_badLeaseInTableExists_assertInitializationWithOtherLeases() + throws ProvisionedThroughputException, InvalidStateException, DependencyException { + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey1", WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey2", WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey3", WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey4", WORKER_ID)); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey5", "leaseOwner2")); + leaseRefresher.createLeaseIfNotExists(createDummyLease("leaseKey6", "leaseOwner2")); + createAndPutBadLeaseEntryInTable(); + + leaseRenewer.initialize(); + + final Map leaseKeyToLeaseMap = leaseRenewer.getCurrentlyHeldLeases(); + + assertEquals(4, leaseKeyToLeaseMap.size()); + assertTrue(leaseKeyToLeaseMap.containsKey("leaseKey1")); + assertTrue(leaseKeyToLeaseMap.containsKey("leaseKey2")); + assertTrue(leaseKeyToLeaseMap.containsKey("leaseKey3")); + assertTrue(leaseKeyToLeaseMap.containsKey("leaseKey4")); + } - Lease updatedLease = newLease(leaseKey); - updatedLease.checkpoint(ExtendedSequenceNumber.TRIM_HORIZON); + @Test + void testLeaseRenewerDoesNotUpdateInMemoryLeaseIfDDBFailsUpdate() throws Exception { + DynamoDBLeaseRefresher leaseRefresherMock = mock(DynamoDBLeaseRefresher.class, Mockito.RETURNS_MOCKS); + createRenewer(leaseRefresherMock); - doThrow(new DependencyException(new RuntimeException())) - .when(leaseRefresher) - .updateLease(updatedLease); + final String leaseKey = "leaseToUpdate"; + final Lease lease = createDummyLease(leaseKey, WORKER_ID); + leaseRenewer.addLeasesToRenew(ImmutableList.of(lease)); + final Lease updatedLease = createDummyLease(leaseKey, WORKER_ID); + updatedLease.checkpoint(LATEST); + when(leaseRefresherMock.updateLease(updatedLease)).thenThrow(new DependencyException(new RuntimeException())); try { - UUID concurrencyToken = renewer.getCurrentlyHeldLease(leaseKey).concurrencyToken(); - renewer.updateLease(updatedLease, concurrencyToken, "test", "dummyShardId"); + final UUID concurrencyToken = + leaseRenewer.getCurrentlyHeldLease(leaseKey).concurrencyToken(); + leaseRenewer.updateLease(updatedLease, concurrencyToken, "test", "dummyShardId"); fail(); } catch (DependencyException e) { // expected } - assertEquals(0L, (long) lease.leaseCounter()); // leaseCounter should not be incremented due to DDB failure - assertEquals(ExtendedSequenceNumber.LATEST, lease.checkpoint()); + final Lease currentLease = leaseRenewer.getCurrentlyHeldLeases().get(leaseKey); + assertEquals(123L, currentLease.leaseCounter()); // leaseCounter should not be incremented due to DDB failure + assertEquals(TRIM_HORIZON, currentLease.checkpoint()); + } + + private void createAndPutBadLeaseEntryInTable() { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(TEST_LEASE_TABLE) + .item(ImmutableMap.of( + "leaseKey", AttributeValue.builder().s("badLeaseKey").build())) + .build(); + + dynamoDbAsyncClient.putItem(putItemRequest); + } + + private void createRenewer(final DynamoDBLeaseRefresher leaseRefresher) throws Exception { + when(mockExecutorService.submit(any(Callable.class))).thenAnswer(invocation -> { + this.leaseRenewalCallable = (Callable) invocation.getArguments()[0]; + return mockFuture; + }); + when(mockFuture.get()).thenReturn(false); + this.leaseRenewer = new DynamoDBLeaseRenewer( + leaseRefresher, + WORKER_ID, + Duration.ofHours(1).toMillis(), + mockExecutorService, + new NullMetricsFactory(), + leaseStatsRecorder, + mockLeaseGracefulShutdownCallBack); + leaseRefresher.createLeaseTableIfNotExists(); + leaseRefresher.waitUntilLeaseTableExists(1, 30); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerIntegrationTest.java index 6b86a5e79..4b954d76c 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerIntegrationTest.java @@ -52,7 +52,7 @@ public void testSimpleLeaseTake() throws LeasingException { } @Test - public void testNotTakeUpdatedLease() throws LeasingException { + public void testNotTakeUpdatedLease() throws Exception { TestHarnessBuilder builder = new TestHarnessBuilder(leaseRefresher); builder.withLease("1", "bar").build(); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerTest.java index 4e927f31b..a8ede99f0 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/DynamoDBLeaseTakerTest.java @@ -69,7 +69,7 @@ public void setup() { } /** - * Test method for {@link DynamoDBLeaseTaker#stringJoin(java.util.Collection, java.lang.String)}. + * Test method for {@link DynamoDBLeaseTaker#stringJoin(java.util.Collection, String)}. */ @Test public final void testStringJoin() { @@ -93,10 +93,6 @@ public void test_computeLeaseCounts_noExpiredLease() throws Exception { dynamoDBLeaseTaker.allLeases.putAll( leases.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity()))); - when(leaseRefresher.listLeases()).thenReturn(leases); - when(metricsFactory.createMetrics()).thenReturn(new NullMetricsScope()); - when(timeProvider.call()).thenReturn(MOCK_CURRENT_TIME); - final Map actualOutput = dynamoDBLeaseTaker.computeLeaseCounts(ImmutableList.of()); final Map expectedOutput = new HashMap<>(); @@ -117,10 +113,6 @@ public void test_computeLeaseCounts_withExpiredLease() throws Exception { dynamoDBLeaseTaker.allLeases.putAll( leases.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity()))); - when(leaseRefresher.listLeases()).thenReturn(leases); - when(metricsFactory.createMetrics()).thenReturn(new NullMetricsScope()); - when(timeProvider.call()).thenReturn(MOCK_CURRENT_TIME); - final Map actualOutput = dynamoDBLeaseTaker.computeLeaseCounts(leases); final Map expectedOutput = new HashMap<>(); @@ -144,7 +136,6 @@ public void test_veryOldLeaseDurationNanosMultiplierGetsCorrectLeases() throws E dynamoDBLeaseTakerWithCustomMultiplier.allLeases.putAll( allLeases.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity()))); - when(leaseRefresher.listLeases()).thenReturn(allLeases); when(metricsFactory.createMetrics()).thenReturn(new NullMetricsScope()); when(timeProvider.call()).thenReturn(MOCK_CURRENT_TIME); @@ -172,9 +163,7 @@ public void test_disableEnablePriorityLeaseAssignmentGetsCorrectLeases() throws dynamoDBLeaseTakerWithDisabledPriorityLeaseAssignment.allLeases.putAll( allLeases.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity()))); - when(leaseRefresher.listLeases()).thenReturn(allLeases); when(metricsFactory.createMetrics()).thenReturn(new NullMetricsScope()); - when(timeProvider.call()).thenReturn(MOCK_CURRENT_TIME); Set output = dynamoDBLeaseTakerWithDisabledPriorityLeaseAssignment.computeLeasesToTake(expiredLeases, timeProvider); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/TestHarnessBuilder.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/TestHarnessBuilder.java index 38e4f50cd..2c87f1b8a 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/TestHarnessBuilder.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/leases/dynamodb/TestHarnessBuilder.java @@ -186,7 +186,7 @@ public Map renewMutateAssert(LeaseRenewer renewer, String... rene return heldLeases; } - public void renewAllLeases() throws LeasingException { + public void renewAllLeases() throws Exception { for (Lease lease : leases.values()) { leaseRefresher.renewLease(lease); } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ConsumerStatesTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ConsumerStatesTest.java index 9c9f19306..9491a97fc 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ConsumerStatesTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ConsumerStatesTest.java @@ -38,6 +38,7 @@ import software.amazon.kinesis.leases.LeaseCleanupManager; import software.amazon.kinesis.leases.LeaseCoordinator; import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardObjectHelper; @@ -84,6 +85,9 @@ public class ConsumerStatesTest { @Mock private LeaseRefresher leaseRefresher; + @Mock + private LeaseStatsRecorder leaseStatsRecorder; + @Mock private Checkpointer checkpointer; @@ -236,7 +240,7 @@ public void initializingStateTest() { @Test public void processingStateTestSynchronous() { - + when(leaseCoordinator.leaseStatsRecorder()).thenReturn(leaseStatsRecorder); ConsumerState state = ShardConsumerState.PROCESSING.consumerState(); ConsumerTask task = state.createTask(argument, consumer, null); @@ -268,7 +272,7 @@ public void processingStateTestSynchronous() { @Test public void processingStateTestAsynchronous() { - + when(leaseCoordinator.leaseStatsRecorder()).thenReturn(leaseStatsRecorder); ConsumerState state = ShardConsumerState.PROCESSING.consumerState(); ConsumerTask task = state.createTask(argument, consumer, null); @@ -300,7 +304,7 @@ public void processingStateTestAsynchronous() { @Test public void processingStateRecordsFetcher() { - + when(leaseCoordinator.leaseStatsRecorder()).thenReturn(leaseStatsRecorder); ConsumerState state = ShardConsumerState.PROCESSING.consumerState(); ConsumerTask task = state.createTask(argument, consumer, null); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandlerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandlerTest.java new file mode 100644 index 000000000..2f077ed41 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/LeaseGracefulShutdownHandlerTest.java @@ -0,0 +1,227 @@ +package software.amazon.kinesis.lifecycle; + +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseCoordinator; +import software.amazon.kinesis.leases.LeaseHelper; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.ShardInfo; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +class LeaseGracefulShutdownHandlerTest { + + private static final String WORKER_ID = "workerId"; + private static final long SHUTDOWN_TIMEOUT = 5000L; + + private final Lease lease = LeaseHelper.createLease("shardId-0", "leaseOwner", Collections.emptyList()); + private final ConcurrentMap shardConsumerMap = new ConcurrentHashMap<>(); + + private LeaseGracefulShutdownHandler handler; + private Runnable gracefulShutdownRunnable; + + @Mock + private LeaseCoordinator mockLeaseCoordinator; + + @Mock + private Supplier mockTimeSupplier; + + @Mock + private ShardConsumer mockShardConsumer; + + @Mock + private ScheduledExecutorService mockScheduledExecutorService; + + @Mock + private LeaseRefresher mockLeaseRefresher; + + @BeforeEach + void setUp() throws Exception { + MockitoAnnotations.initMocks(this); + when(mockScheduledExecutorService.scheduleAtFixedRate( + any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class))) + .thenAnswer(invocation -> { + Object[] args = invocation.getArguments(); + this.gracefulShutdownRunnable = (Runnable) args[0]; + return mock(ScheduledFuture.class); + }); + when(mockLeaseCoordinator.leaseRefresher()).thenReturn(mockLeaseRefresher); + when(mockLeaseRefresher.assignLease(any(Lease.class), any(String.class))) + .thenReturn(true); + + when(mockLeaseCoordinator.workerIdentifier()).thenReturn(WORKER_ID); + when(mockTimeSupplier.get()).thenReturn(0L); + + handler = new LeaseGracefulShutdownHandler( + SHUTDOWN_TIMEOUT, + shardConsumerMap, + mockLeaseCoordinator, + mockTimeSupplier, + mockScheduledExecutorService); + + lease.checkpointOwner(WORKER_ID); + lease.concurrencyToken(UUID.randomUUID()); + when(mockLeaseCoordinator.getCurrentlyHeldLease(lease.leaseKey())).thenReturn(lease); + handler.start(); + } + + @Test + void testSubsequentStarts() { + handler.start(); + handler.start(); + verify(mockScheduledExecutorService) + .scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class)); + } + + @Test + void testSubsequentShutdowns() { + handler.start(); + handler.stop(); + handler.stop(); + verify(mockScheduledExecutorService).shutdown(); + } + + @Test + void testIgnoreDuplicatEnqueues() { + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shardConsumerMap.put(shardInfo, mockShardConsumer); + handler.enqueueShutdown(lease); + // check gracefulShutdown is called + verify(mockShardConsumer, times(1)).gracefulShutdown(null); + + // enqueue the same lease again to make sure it doesn't cause another shutdown call + handler.enqueueShutdown(lease); + verify(mockShardConsumer, times(1)).gracefulShutdown(null); + + // adding another lease to check it's enqueued + final Lease lease2 = createShardConsumerForLease("shardId-2"); + handler.enqueueShutdown(lease2); + verify(shardConsumerMap.get(DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease2)), times(1)) + .gracefulShutdown(null); + } + + @Test + void testIgnoreNonPendingShutdownLease() throws Exception { + // enqueue a none shutdown lease + lease.checkpointOwner(null); + handler.enqueueShutdown(lease); + verify(mockShardConsumer, never()).gracefulShutdown(null); + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any((String.class))); + } + + @Test + void testMonitorGracefulShutdownLeases() throws Exception { + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shardConsumerMap.put(shardInfo, mockShardConsumer); + handler.enqueueShutdown(lease); + + handler.start(); + gracefulShutdownRunnable.run(); + + // check gracefulShutdown is called + verify(mockShardConsumer).gracefulShutdown(null); + + // run again. this is no op because shutdown is already called and checkpoint is not expired + gracefulShutdownRunnable.run(); + verify(mockShardConsumer).gracefulShutdown(null); + + // make it return true which should cause + when(mockShardConsumer.isShutdown()).thenReturn(true); + gracefulShutdownRunnable.run(); + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any((String.class))); + } + + @Test + void testNotEnqueueBecauseNoShardConsumerFound() throws Exception { + when(mockShardConsumer.isShutdown()).thenReturn(true); + handler.enqueueShutdown(lease); + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any((String.class))); + } + + @Test + void testAssignLeaseIsCalledBecauseTimeoutReached() throws Exception { + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shardConsumerMap.put(shardInfo, mockShardConsumer); + when(mockShardConsumer.isShutdown()).thenReturn(false); + when(mockTimeSupplier.get()).thenReturn(0L); + handler.enqueueShutdown(lease); + + handler.start(); + gracefulShutdownRunnable.run(); + + verify(mockShardConsumer).gracefulShutdown(null); + + // Timeout << SHUTDOWN_TIMEOUT + verify(mockLeaseRefresher, never()).assignLease(lease, lease.leaseOwner()); + + // Timeout < SHUTDOWN_TIMEOUT + when(mockTimeSupplier.get()).thenReturn(SHUTDOWN_TIMEOUT - 1000); + gracefulShutdownRunnable.run(); + verify(mockLeaseRefresher, never()).assignLease(lease, lease.leaseOwner()); + + // Timeout > SHUTDOWN_TIMEOUT + when(mockTimeSupplier.get()).thenReturn(SHUTDOWN_TIMEOUT + 1000); + gracefulShutdownRunnable.run(); + verify(mockLeaseRefresher).assignLease(lease, lease.leaseOwner()); + } + + @Test + void testRemoveLeaseFromPendingShutdownMapBecauseLeaseCoordinatorDontOwnItAnymore() throws Exception { + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shardConsumerMap.put(shardInfo, mockShardConsumer); + when(mockShardConsumer.isShutdown()).thenReturn(false); + // fast-forward and time out the shutdown lease. This should ideally trigger an assignLease call. + when(mockTimeSupplier.get()).thenReturn(SHUTDOWN_TIMEOUT + 1000); + // but now we pretend we don't own the lease anymore. This should avoid the assignLease call after all. + when(mockLeaseCoordinator.getCurrentlyHeldLease(lease.leaseKey())).thenReturn(null); + handler.enqueueShutdown(lease); + + gracefulShutdownRunnable.run(); + verify(mockLeaseRefresher, never()).assignLease(lease, lease.leaseOwner()); + } + + @Test + void testAssignLeaseIsNotCalledIfCheckpointOwnerIsNotTheSameWorker() throws Exception { + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shardConsumerMap.put(shardInfo, mockShardConsumer); + when(mockShardConsumer.isShutdown()).thenReturn(false); + handler.enqueueShutdown(lease); + // make it expire during timeout check + when(mockTimeSupplier.get()).thenReturn(0L).thenReturn(SHUTDOWN_TIMEOUT + 1000); + // set checkpoint owner to some random worker + lease.checkpointOwner("random_owner"); + + handler.start(); + gracefulShutdownRunnable.run(); + + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any((String.class))); + } + + private Lease createShardConsumerForLease(String shardId) { + final Lease lease = LeaseHelper.createLease(shardId, "leaseOwner", Collections.emptyList()); + lease.checkpointOwner(WORKER_ID); + lease.concurrencyToken(UUID.randomUUID()); + shardConsumerMap.put(DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease), mock(ShardConsumer.class)); + when(mockLeaseCoordinator.getCurrentlyHeldLease(lease.leaseKey())).thenReturn(lease); + return lease; + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ProcessTaskTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ProcessTaskTest.java index 300ad8327..d64fc6b72 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ProcessTaskTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ProcessTaskTest.java @@ -39,6 +39,7 @@ import lombok.Getter; import org.hamcrest.Description; import org.hamcrest.Matcher; +import org.hamcrest.Matchers; import org.hamcrest.TypeSafeDiagnosingMatcher; import org.junit.Before; import org.junit.Test; @@ -49,6 +50,7 @@ import software.amazon.awssdk.services.kinesis.model.HashKeyRange; import software.amazon.awssdk.services.kinesis.model.Shard; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; +import software.amazon.kinesis.leases.LeaseStatsRecorder; import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput; @@ -63,7 +65,6 @@ import software.amazon.kinesis.retrieval.kpl.Messages.AggregatedRecord; import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder; -import static org.hamcrest.CoreMatchers.allOf; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.not; @@ -114,6 +115,9 @@ public class ProcessTaskTest { @Mock private ThrottlingReporter throttlingReporter; + @Mock + private LeaseStatsRecorder leaseStatsRecorder; + private ProcessTask processTask; @Before @@ -160,7 +164,8 @@ private ProcessTask makeProcessTask( IDLE_TIME_IN_MILLISECONDS, aggregatorUtil, new NullMetricsFactory(), - schemaRegistryDecoder); + schemaRegistryDecoder, + leaseStatsRecorder); } @Test @@ -824,7 +829,7 @@ private static class TaskResultMatcher extends TypeSafeDiagnosingMatcher received = new ArrayList<>(); doAnswer(a -> { - ProcessRecordsInput input = a.getArgumentAt(0, ProcessRecordsInput.class); + ProcessRecordsInput input = (ProcessRecordsInput) a.getArgument(0); received.add(input); if (input.records().stream() .anyMatch(r -> StringUtils.startsWith(r.partitionKey(), TERMINAL_MARKER))) { @@ -336,7 +336,7 @@ public void restartAfterRequestTimerExpiresWhenNotGettingRecordsAfterInitializat List received = new ArrayList<>(); doAnswer(a -> { - ProcessRecordsInput input = a.getArgumentAt(0, ProcessRecordsInput.class); + ProcessRecordsInput input = (ProcessRecordsInput) a.getArgument(0); received.add(input); if (input.records().stream() .anyMatch(r -> StringUtils.startsWith(r.partitionKey(), TERMINAL_MARKER))) { @@ -408,7 +408,7 @@ public void restartAfterRequestTimerExpiresWhenInitialTaskExecutionIsRejected() List received = new ArrayList<>(); doAnswer(a -> { - ProcessRecordsInput input = a.getArgumentAt(0, ProcessRecordsInput.class); + ProcessRecordsInput input = (ProcessRecordsInput) a.getArgument(0); received.add(input); if (input.records().stream() .anyMatch(r -> StringUtils.startsWith(r.partitionKey(), TERMINAL_MARKER))) { diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShardConsumerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShardConsumerTest.java index daab8efe1..83b27ba79 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShardConsumerTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShardConsumerTest.java @@ -31,7 +31,6 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Function; import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.extern.slf4j.Slf4j; @@ -693,19 +692,13 @@ public void testRequestedShutdownWhileQuiet() throws Exception { mockSuccessfulProcessing(taskBarrier); when(processingState.shutdownTransition(eq(ShutdownReason.REQUESTED))).thenReturn(shutdownRequestedState); - when(shutdownRequestedState.requiresDataAvailability()).thenReturn(false); when(shutdownRequestedState.createTask(any(), any(), any())).thenReturn(shutdownRequestedTask); when(shutdownRequestedState.taskType()).thenReturn(TaskType.SHUTDOWN_NOTIFICATION); when(shutdownRequestedTask.call()).thenReturn(new TaskResult(null)); when(shutdownRequestedState.shutdownTransition(eq(ShutdownReason.REQUESTED))) .thenReturn(shutdownRequestedAwaitState); - when(shutdownRequestedState.shutdownTransition(eq(ShutdownReason.LEASE_LOST))) - .thenReturn(shutdownState); - when(shutdownRequestedAwaitState.requiresDataAvailability()).thenReturn(false); when(shutdownRequestedAwaitState.createTask(any(), any(), any())).thenReturn(null); - when(shutdownRequestedAwaitState.shutdownTransition(eq(ShutdownReason.REQUESTED))) - .thenReturn(shutdownRequestedState); when(shutdownRequestedAwaitState.shutdownTransition(eq(ShutdownReason.LEASE_LOST))) .thenReturn(shutdownState); when(shutdownRequestedAwaitState.taskType()).thenReturn(TaskType.SHUTDOWN_COMPLETE); @@ -786,7 +779,6 @@ public void testExceptionInProcessingStopsRequests() throws Exception { Optional.of(1L), shardConsumerArgument, initialState, - Function.identity(), 1, taskExecutionListener, 0); @@ -842,7 +834,6 @@ public void testLongRunningTasks() throws Exception { Optional.of(1L), shardConsumerArgument, initialState, - Function.identity(), 1, taskExecutionListener, 0); @@ -950,7 +941,6 @@ public void testEmptyShardProcessingRaceCondition() throws Exception { Optional.of(1L), shardConsumerArgument, mockState, - Function.identity(), 1, taskExecutionListener, 0); @@ -1034,15 +1024,7 @@ public void testEmptyShardProcessingRaceCondition() throws Exception { // race condition we want. reset(mockState); AtomicBoolean successTransitionCalled = new AtomicBoolean(false); - when(mockState.successTransition()).then(input -> { - successTransitionCalled.set(true); - return mockState; - }); AtomicBoolean shutdownTransitionCalled = new AtomicBoolean(false); - when(mockState.shutdownTransition(any())).then(input -> { - shutdownTransitionCalled.set(true); - return mockState; - }); when(mockState.state()).then(input -> { if (successTransitionCalled.get() && shutdownTransitionCalled.get()) { return ShardConsumerState.SHUTTING_DOWN; @@ -1065,7 +1047,6 @@ private void mockSuccessfulShutdown(CyclicBarrier taskCallBarrier) { private void mockSuccessfulShutdown(CyclicBarrier taskArriveBarrier, CyclicBarrier taskDepartBarrier) { when(shutdownState.createTask(eq(shardConsumerArgument), any(), any())).thenReturn(shutdownTask); when(shutdownState.taskType()).thenReturn(TaskType.SHUTDOWN); - when(shutdownTask.taskType()).thenReturn(TaskType.SHUTDOWN); when(shutdownTask.call()).thenAnswer(i -> { awaitBarrier(taskArriveBarrier); awaitBarrier(taskDepartBarrier); @@ -1084,7 +1065,6 @@ private void mockSuccessfulProcessing(CyclicBarrier taskCallBarrier) { private void mockSuccessfulProcessing(CyclicBarrier taskCallBarrier, CyclicBarrier taskInterlockBarrier) { when(processingState.createTask(eq(shardConsumerArgument), any(), any())) .thenReturn(processingTask); - when(processingState.requiresDataAvailability()).thenReturn(true); when(processingState.taskType()).thenReturn(TaskType.PROCESS); when(processingTask.taskType()).thenReturn(TaskType.PROCESS); when(processingTask.call()).thenAnswer(i -> { @@ -1117,7 +1097,6 @@ private void mockSuccessfulInitialize(CyclicBarrier taskCallBarrier, CyclicBarri return initializeTaskResult; }); when(initializeTaskResult.getException()).thenReturn(null); - when(initialState.requiresDataAvailability()).thenReturn(false); when(initialState.successTransition()).thenReturn(processingState); when(initialState.state()).thenReturn(ConsumerStates.ShardConsumerState.INITIALIZING); } @@ -1131,10 +1110,8 @@ private void mockSuccessfulUnblockOnParents() { when(blockedOnParentsState.createTask(eq(shardConsumerArgument), any(), any())) .thenReturn(blockedOnParentsTask); when(blockedOnParentsState.taskType()).thenReturn(TaskType.BLOCK_ON_PARENT_SHARDS); - when(blockedOnParentsTask.taskType()).thenReturn(TaskType.BLOCK_ON_PARENT_SHARDS); when(blockedOnParentsTask.call()).thenAnswer(i -> blockOnParentsTaskResult); when(blockOnParentsTaskResult.getException()).thenReturn(null); - when(blockedOnParentsState.requiresDataAvailability()).thenReturn(false); when(blockedOnParentsState.successTransition()).thenReturn(initialState); when(blockedOnParentsState.state()).thenReturn(ShardConsumerState.WAITING_ON_PARENT_SHARDS); } @@ -1174,7 +1151,6 @@ private ShardConsumer createShardConsumer( logWarningForTaskAfterMillis, shardConsumerArgument, state, - Function.identity(), 1, taskExecutionListener, 0); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTaskTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTaskTest.java new file mode 100644 index 000000000..3bdf0f809 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownNotificationTaskTest.java @@ -0,0 +1,74 @@ +package software.amazon.kinesis.lifecycle; + +import java.util.Collections; +import java.util.UUID; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; +import software.amazon.kinesis.leases.Lease; +import software.amazon.kinesis.leases.LeaseCoordinator; +import software.amazon.kinesis.leases.LeaseHelper; +import software.amazon.kinesis.leases.LeaseRefresher; +import software.amazon.kinesis.leases.ShardInfo; +import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator; +import software.amazon.kinesis.processor.ShardRecordProcessor; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +class ShutdownNotificationTaskTest { + private static final String LEASE_OWNER = "leaseOwner"; + private final Lease lease = LeaseHelper.createLease("shardId-9", LEASE_OWNER, Collections.emptyList()); + + @Mock + private ShardRecordProcessorCheckpointer mockRecordProcessorCheckpointer; + + @Mock + private LeaseRefresher mockLeaseRefresher; + + @Mock + private LeaseCoordinator mockLeaseCoordinator; + + @Mock + private ShardRecordProcessor mockShardRecordProcessor; + + private ShutdownNotificationTask shutdownNotificationTask; + + @BeforeEach + void setup() { + MockitoAnnotations.initMocks(this); + lease.checkpointOwner("checkpoint_owner"); + lease.concurrencyToken(UUID.randomUUID()); + final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease); + shutdownNotificationTask = new ShutdownNotificationTask( + mockShardRecordProcessor, mockRecordProcessorCheckpointer, null, shardInfo, mockLeaseCoordinator); + when(mockLeaseCoordinator.getCurrentlyHeldLease(lease.leaseKey())).thenReturn(lease); + when(mockLeaseCoordinator.workerIdentifier()).thenReturn(LEASE_OWNER); + when(mockLeaseCoordinator.leaseRefresher()).thenReturn(mockLeaseRefresher); + } + + @Test + void testLeaseTransferCalledAsCheckpointOwnerExist() throws Exception { + lease.checkpointOwner(LEASE_OWNER); + shutdownNotificationTask.call(); + verify(mockLeaseRefresher).assignLease(lease, lease.leaseOwner()); + verify(mockLeaseCoordinator).dropLease(lease); + } + + @Test + void testLeaseTransferNotCalledAsCheckpointOwnerMisMatch() throws Exception { + lease.checkpointOwner(null); + shutdownNotificationTask.call(); + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any(String.class)); + verify(mockLeaseCoordinator).dropLease(lease); + + lease.checkpointOwner("else"); + shutdownNotificationTask.call(); + verify(mockLeaseRefresher, never()).assignLease(any(Lease.class), any(String.class)); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownTaskTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownTaskTest.java index db64d198c..78e62461d 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownTaskTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/lifecycle/ShutdownTaskTest.java @@ -91,10 +91,12 @@ public class ShutdownTaskTest { */ private static final String SHARD_ID = "shardId-0"; + private static final String LEASE_OWNER = "leaseOwner"; private static final ShardInfo SHARD_INFO = new ShardInfo(SHARD_ID, "concurrencyToken", Collections.emptySet(), ExtendedSequenceNumber.LATEST); private ShutdownTask task; + private Lease lease; @Mock private RecordsPublisher recordsPublisher; @@ -135,11 +137,11 @@ public void setUp() throws Exception { when(hierarchicalShardSyncer.createLeaseForChildShard( Matchers.any(ChildShard.class), Matchers.any(StreamIdentifier.class))) .thenReturn(childLease); - setupLease(SHARD_ID, Collections.emptyList()); + lease = setupLease(SHARD_ID, Collections.emptyList()); when(leaseCoordinator.leaseRefresher()).thenReturn(leaseRefresher); - when(shardDetector.streamIdentifier()).thenReturn(STREAM_IDENTIFIER); + when(shardDetector.streamIdentifier()).thenReturn(STREAM_IDENTIFIER); task = createShutdownTask(SHARD_END, constructChildrenFromSplit()); } @@ -361,7 +363,7 @@ private void verifyShutdownAndNoDrop() { } private Lease setupLease(final String leaseKey, final Collection parentShardIds) throws Exception { - final Lease lease = LeaseHelper.createLease(leaseKey, "leaseOwner", parentShardIds); + final Lease lease = LeaseHelper.createLease(leaseKey, LEASE_OWNER, parentShardIds); when(leaseCoordinator.getCurrentlyHeldLease(lease.leaseKey())).thenReturn(lease); when(leaseRefresher.getLease(lease.leaseKey())).thenReturn(lease); return lease; diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/fanout/FanOutRecordsPublisherTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/fanout/FanOutRecordsPublisherTest.java index cf135159d..76fa8b904 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/fanout/FanOutRecordsPublisherTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/fanout/FanOutRecordsPublisherTest.java @@ -1627,8 +1627,6 @@ protected void logAcquireTimeoutMessage(Throwable t) { ArgumentCaptor flowCaptor = ArgumentCaptor.forClass(FanOutRecordsPublisher.RecordFlow.class); - doNothing().when(publisher).subscribe(captor.capture()); - source.start( ExtendedSequenceNumber.LATEST, InitialPositionInStreamExtended.newInitialPosition(InitialPositionInStream.LATEST)); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/AsynchronousGetRecordsRetrievalStrategyTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/AsynchronousGetRecordsRetrievalStrategyTest.java index 13db2a4d6..faa727a7d 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/AsynchronousGetRecordsRetrievalStrategyTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/AsynchronousGetRecordsRetrievalStrategyTest.java @@ -80,7 +80,6 @@ public void before() { expectedResponses = GetRecordsResponse.builder().build(); when(completionServiceSupplier.get()).thenReturn(completionService); - when(dataFetcherResult.getResult()).thenReturn(expectedResponses); when(dataFetcherResult.accept()).thenReturn(expectedResponses); } @@ -116,8 +115,6 @@ public void testBlockedAndSuccessfulFuture() throws Exception { when(successfulFuture.get()).thenReturn(dataFetcherResult); when(successfulFuture.cancel(anyBoolean())).thenReturn(false); when(blockedFuture.cancel(anyBoolean())).thenReturn(true); - when(successfulFuture.isCancelled()).thenReturn(false); - when(blockedFuture.isCancelled()).thenReturn(true); GetRecordsResponse actualResults = strategy.getRecords(10); @@ -158,8 +155,6 @@ public void testPoolOutOfResources() throws Exception { when(successfulFuture.get()).thenReturn(dataFetcherResult); when(successfulFuture.cancel(anyBoolean())).thenReturn(false); when(blockedFuture.cancel(anyBoolean())).thenReturn(true); - when(successfulFuture.isCancelled()).thenReturn(false); - when(blockedFuture.isCancelled()).thenReturn(true); GetRecordsResponse actualResult = strategy.getRecords(10); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/KinesisDataFetcherTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/KinesisDataFetcherTest.java index b3cd0c2ac..57221b612 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/KinesisDataFetcherTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/KinesisDataFetcherTest.java @@ -57,7 +57,6 @@ import software.amazon.kinesis.retrieval.DataFetcherResult; import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy; import software.amazon.kinesis.retrieval.RetryableRetrievalException; -import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import static org.hamcrest.CoreMatchers.isA; import static org.junit.Assert.assertEquals; @@ -178,7 +177,6 @@ public void testAdvanceIteratorTo() throws KinesisClientLibException { .thenReturn(makeGetShardIteratorResponse(iteratorA)) .thenReturn(makeGetShardIteratorResponse(iteratorA)) .thenReturn(makeGetShardIteratorResponse(iteratorB)); - when(checkpoint.getCheckpoint(SHARD_ID)).thenReturn(new ExtendedSequenceNumber(seqA)); kinesisDataFetcher.initialize(seqA, null); kinesisDataFetcher.advanceIteratorTo(seqA, null); @@ -594,7 +592,6 @@ private void testInitializeAndFetch( .thenReturn(makeGetRecordsResponse(null, expectedRecords)); Checkpointer checkpoint = mock(Checkpointer.class); - when(checkpoint.getCheckpoint(SHARD_ID)).thenReturn(new ExtendedSequenceNumber(seqNo)); final GetRecordsRetrievalStrategy getRecordsRetrievalStrategy = new SynchronousGetRecordsRetrievalStrategy(kinesisDataFetcher); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PollingConfigTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PollingConfigTest.java index 572bc0f06..b71e6691d 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PollingConfigTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PollingConfigTest.java @@ -48,4 +48,10 @@ public void testInvalidStateMultiWithStreamName() { public void testInvalidRecordLimit() { config.maxRecords(PollingConfig.DEFAULT_MAX_RECORDS + 1); } + + @Test + public void testMinIdleMillisLimit() { + config.idleTimeBetweenReadsInMillis(0); + assertEquals(config.idleTimeBetweenReadsInMillis(), PollingConfig.MIN_IDLE_MILLIS_BETWEEN_READS); + } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherIntegrationTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherIntegrationTest.java index c5340f979..291531b0e 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherIntegrationTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherIntegrationTest.java @@ -48,6 +48,7 @@ import software.amazon.kinesis.retrieval.DataFetcherResult; import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy; import software.amazon.kinesis.retrieval.RecordsRetrieved; +import software.amazon.kinesis.retrieval.ThrottlingReporter; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import static org.junit.Assert.assertEquals; @@ -126,6 +127,7 @@ public void setup() throws Exception { new NullMetricsFactory(), operation, "test-shard", + new ThrottlingReporter(5, "test-shard"), AWAIT_TERMINATION_TIMEOUT); } @@ -186,6 +188,7 @@ public void testDifferentShardCaches() { new NullMetricsFactory(), operation, "test-shard-2", + new ThrottlingReporter(5, "test-shard"), AWAIT_TERMINATION_TIMEOUT); getRecordsCache.start(extendedSequenceNumber, initialPosition); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherTest.java index a046e6b9e..50ad888b4 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherTest.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/retrieval/polling/PrefetchRecordsPublisherTest.java @@ -44,7 +44,9 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; +import org.mockito.InOrder; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.runners.MockitoJUnitRunner; import org.mockito.stubbing.Answer; @@ -56,6 +58,7 @@ import software.amazon.awssdk.services.kinesis.model.ExpiredIteratorException; import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse; import software.amazon.awssdk.services.kinesis.model.InvalidArgumentException; +import software.amazon.awssdk.services.kinesis.model.ProvisionedThroughputExceededException; import software.amazon.awssdk.services.kinesis.model.Record; import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.StreamIdentifier; @@ -68,6 +71,7 @@ import software.amazon.kinesis.retrieval.RecordsPublisher; import software.amazon.kinesis.retrieval.RecordsRetrieved; import software.amazon.kinesis.retrieval.RetryableRetrievalException; +import software.amazon.kinesis.retrieval.ThrottlingReporter; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import software.amazon.kinesis.utils.BlockingUtils; @@ -84,6 +88,7 @@ import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; @@ -124,6 +129,9 @@ public class PrefetchRecordsPublisherTest { @Mock private ExtendedSequenceNumber sequenceNumber; + @Mock + private ThrottlingReporter throttlingReporter; + private List records; private ExecutorService executorService; private LinkedBlockingQueue spyQueue; @@ -214,8 +222,6 @@ private void verifyInternalState(int queueSize) { public void testGetRecords() { record = Record.builder().data(createByteBufferWithSize(SIZE_512_KB)).build(); - when(records.size()).thenReturn(1000); - final List expectedRecords = records.stream().map(KinesisClientRecord::fromRecord).collect(Collectors.toList()); @@ -239,8 +245,6 @@ public void testGetRecordsWithInitialFailures_LessThanRequiredWait_Throws() { .thenReturn(getRecordsResponse); record = Record.builder().data(createByteBufferWithSize(SIZE_512_KB)).build(); - when(records.size()).thenReturn(1000); - getRecordsCache.start(sequenceNumber, initialPosition); // Setup timeout to be less than what the PrefetchRecordsPublisher will need based on the idle time between // get calls to validate exception is thrown @@ -257,8 +261,6 @@ public void testGetRecordsWithInitialFailures_AdequateWait_Success() { .thenReturn(getRecordsResponse); record = Record.builder().data(createByteBufferWithSize(SIZE_512_KB)).build(); - when(records.size()).thenReturn(1000); - final List expectedRecords = records.stream().map(KinesisClientRecord::fromRecord).collect(Collectors.toList()); @@ -281,8 +283,6 @@ record = Record.builder().data(createByteBufferWithSize(SIZE_512_KB)).build(); public void testGetRecordsWithInvalidResponse() { record = Record.builder().data(createByteBufferWithSize(SIZE_512_KB)).build(); - when(records.size()).thenReturn(1000); - GetRecordsResponse response = GetRecordsResponse.builder().records(records).build(); when(getRecordsRetrievalStrategy.getRecords(eq(MAX_RECORDS_PER_CALL))).thenReturn(response); @@ -356,8 +356,6 @@ record = Record.builder().data(createByteBufferWithSize(SIZE_1_MB)).build(); @Test public void testFullCacheRecordsCount() { int recordsSize = 4500; - when(records.size()).thenReturn(recordsSize); - getRecordsCache.start(sequenceNumber, initialPosition); sleep(2000); @@ -372,8 +370,6 @@ public void testFullCacheRecordsCount() { @Test public void testFullCacheSize() { int recordsSize = 200; - when(records.size()).thenReturn(recordsSize); - getRecordsCache.start(sequenceNumber, initialPosition); // Sleep for a few seconds for the cache to fill up. @@ -735,7 +731,7 @@ public void testResetClearsRemainingData() { when(getRecordsRetrievalStrategy.getRecords(anyInt())).thenAnswer(retrieverAnswer); doAnswer(a -> { - String resetTo = a.getArgumentAt(0, String.class); + String resetTo = (String) a.getArgument(0); retrieverAnswer.resetIteratorTo(resetTo); return null; }) @@ -800,6 +796,24 @@ public void testRepeatSdkExceptionLoop() { } } + /** + * Tests that a thrown {@link ProvisionedThroughputExceededException} writes to throttlingReporter. + */ + @Test + public void testProvisionedThroughputExceededExceptionReporter() { + when(getRecordsRetrievalStrategy.getRecords(anyInt())) + .thenThrow(ProvisionedThroughputExceededException.builder().build()) + .thenReturn(GetRecordsResponse.builder().build()); + + getRecordsCache.start(sequenceNumber, initialPosition); + + BlockingUtils.blockUntilRecordsAvailable(this::evictPublishedEvent, DEFAULT_TIMEOUT_MILLIS); + InOrder inOrder = Mockito.inOrder(throttlingReporter); + inOrder.verify(throttlingReporter).throttled(); + inOrder.verify(throttlingReporter, atLeastOnce()).success(); + inOrder.verifyNoMoreInteractions(); + } + private RecordsRetrieved blockUntilRecordsAvailable() { return BlockingUtils.blockUntilRecordsAvailable(this::evictPublishedEvent, DEFAULT_TIMEOUT_MILLIS); } @@ -905,6 +919,7 @@ private PrefetchRecordsPublisher createPrefetchRecordsPublisher(final long idleM new NullMetricsFactory(), PrefetchRecordsPublisherTest.class.getSimpleName(), "shardId", + throttlingReporter, 1L); } } diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/CgroupTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/CgroupTest.java new file mode 100644 index 000000000..1b45c781a --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/CgroupTest.java @@ -0,0 +1,18 @@ +package software.amazon.kinesis.utils; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet; + +public class CgroupTest { + + @Test + void test_getAvailableCpusFromEffectiveCpuSet() { + assertEquals(8, getAvailableCpusFromEffectiveCpuSet("0-7")); + assertEquals(9, getAvailableCpusFromEffectiveCpuSet("0-4,6,8-10")); + assertEquals(4, getAvailableCpusFromEffectiveCpuSet("0,6,8,10")); + assertEquals(5, getAvailableCpusFromEffectiveCpuSet("1-2,8,10,11")); + assertEquals(1, getAvailableCpusFromEffectiveCpuSet("0")); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/SubscribeToShardRequestMatcher.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/SubscribeToShardRequestMatcher.java index 52c50e054..4a7825945 100644 --- a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/SubscribeToShardRequestMatcher.java +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/utils/SubscribeToShardRequestMatcher.java @@ -3,7 +3,7 @@ import org.mockito.ArgumentMatcher; import software.amazon.awssdk.services.kinesis.model.SubscribeToShardRequest; -public class SubscribeToShardRequestMatcher extends ArgumentMatcher { +public class SubscribeToShardRequestMatcher implements ArgumentMatcher { private SubscribeToShardRequest left; @@ -12,8 +12,7 @@ public SubscribeToShardRequestMatcher(SubscribeToShardRequest left) { this.left = left; } - public boolean matches(Object rightObject) { - SubscribeToShardRequest right = (SubscribeToShardRequest) rightObject; + public boolean matches(SubscribeToShardRequest right) { return left.shardId().equals(right.shardId()) && left.consumerARN().equals(right.consumerARN()) && left.startingPosition().equals(right.startingPosition()); diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/WorkerWorkerMetricsSelectorTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/WorkerWorkerMetricsSelectorTest.java new file mode 100644 index 000000000..73c9dcb8d --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/WorkerWorkerMetricsSelectorTest.java @@ -0,0 +1,87 @@ +package software.amazon.kinesis.worker; + +import java.util.Collections; +import java.util.Optional; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.kinesis.worker.metric.impl.container.Cgroupv1CpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.container.Cgroupv2CpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.container.EcsCpuWorkerMetric; +import software.amazon.kinesis.worker.metric.impl.linux.LinuxCpuWorkerMetric; +import software.amazon.kinesis.worker.platform.OperatingRangeDataProvider; +import software.amazon.kinesis.worker.platform.ResourceMetadataProvider; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class WorkerWorkerMetricsSelectorTest { + + private ResourceMetadataProvider resourceMetadataProvider; + private WorkerMetricsSelector workerMetricsSelector; + + @BeforeEach + void setUp() { + resourceMetadataProvider = mock(ResourceMetadataProvider.class); + workerMetricsSelector = new WorkerMetricsSelector(Collections.singletonList(resourceMetadataProvider)); + + when(resourceMetadataProvider.getPlatform()).thenReturn(ResourceMetadataProvider.ComputePlatform.EC2); + when(resourceMetadataProvider.isOnPlatform()).thenReturn(true); + when(resourceMetadataProvider.getOperatingRangeDataProvider()) + .thenReturn(Optional.of(OperatingRangeDataProvider.LINUX_PROC)); + } + + @Test + void testOnEc2AndLinuxProc() { + assertEquals(1, workerMetricsSelector.getDefaultWorkerMetrics().size()); + assertEquals( + LinuxCpuWorkerMetric.class, + workerMetricsSelector.getDefaultWorkerMetrics().get(0).getClass()); + } + + @Test + void testOnEc2ButNotHaveLinuxProc() { + when(resourceMetadataProvider.getOperatingRangeDataProvider()).thenReturn(Optional.empty()); + assertEquals(0, workerMetricsSelector.getDefaultWorkerMetrics().size()); + } + + @Test + void testOnEksAndCgroupV1() { + when(resourceMetadataProvider.getPlatform()).thenReturn(ResourceMetadataProvider.ComputePlatform.EKS); + when(resourceMetadataProvider.getOperatingRangeDataProvider()) + .thenReturn(Optional.of(OperatingRangeDataProvider.LINUX_EKS_CGROUP_V1)); + assertEquals(1, workerMetricsSelector.getDefaultWorkerMetrics().size()); + assertEquals( + Cgroupv1CpuWorkerMetric.class, + workerMetricsSelector.getDefaultWorkerMetrics().get(0).getClass()); + } + + @Test + void testOnEksAndCgroupV2() { + when(resourceMetadataProvider.getPlatform()).thenReturn(ResourceMetadataProvider.ComputePlatform.EKS); + when(resourceMetadataProvider.getOperatingRangeDataProvider()) + .thenReturn(Optional.of(OperatingRangeDataProvider.LINUX_EKS_CGROUP_V2)); + assertEquals(1, workerMetricsSelector.getDefaultWorkerMetrics().size()); + assertEquals( + Cgroupv2CpuWorkerMetric.class, + workerMetricsSelector.getDefaultWorkerMetrics().get(0).getClass()); + } + + @Test + void testOnEcsAndUsesEcsWorkerMetric() { + when(resourceMetadataProvider.getPlatform()).thenReturn(ResourceMetadataProvider.ComputePlatform.ECS); + when(resourceMetadataProvider.getOperatingRangeDataProvider()) + .thenReturn(Optional.of(OperatingRangeDataProvider.LINUX_ECS_METADATA_KEY_V4)); + assertEquals(1, workerMetricsSelector.getDefaultWorkerMetrics().size()); + assertEquals( + EcsCpuWorkerMetric.class, + workerMetricsSelector.getDefaultWorkerMetrics().get(0).getClass()); + } + + @Test + void testNotOnSupportedPlatform() { + when(resourceMetadataProvider.isOnPlatform()).thenReturn(false); + assertEquals(0, workerMetricsSelector.getDefaultWorkerMetrics().size()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricTest.java new file mode 100644 index 000000000..0cbd1a60d --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricTest.java @@ -0,0 +1,33 @@ +package software.amazon.kinesis.worker.metric; + +import java.util.function.Supplier; + +import org.junit.jupiter.api.Test; + +import static org.junit.Assert.assertTrue; + +class WorkerMetricTest { + + @Test + void testWorkerMetricValueBuildValidCases() { + // build method does not fail means test is valid + WorkerMetric.WorkerMetricValue.builder().value(0D).build(); + WorkerMetric.WorkerMetricValue.builder().value(0.000001D).build(); + WorkerMetric.WorkerMetricValue.builder().value(50D).build(); + WorkerMetric.WorkerMetricValue.builder().value(100D).build(); + WorkerMetric.WorkerMetricValue.builder().value(99.00001D).build(); + + assertFailure(() -> WorkerMetric.WorkerMetricValue.builder().value(-1D).build(), -1D); + assertFailure( + () -> WorkerMetric.WorkerMetricValue.builder().value(100.00001D).build(), 100.00001D); + } + + private void assertFailure(final Supplier supplier, final double value) { + try { + supplier.get(); + throw new RuntimeException("If call reached here that means its a fail"); + } catch (final Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricsTestUtils.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricsTestUtils.java new file mode 100644 index 000000000..689f366d7 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/WorkerMetricsTestUtils.java @@ -0,0 +1,17 @@ +package software.amazon.kinesis.worker.metric; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; + +public class WorkerMetricsTestUtils { + + public static void writeLineToFile(final File file, final String line) throws IOException { + final FileOutputStream fileOutputStream = new FileOutputStream(file); + final OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream); + outputStreamWriter.write(line); + outputStreamWriter.close(); + fileOutputStream.close(); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetricsTest.java new file mode 100644 index 000000000..c300b3506 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv1CpuWorkerMetricsTest.java @@ -0,0 +1,126 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Clock; +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.worker.metric.WorkerMetricsTestUtils.writeLineToFile; + +class Cgroupv1CpuWorkerMetricsTest { + + @Mock + private Clock clock; + + @BeforeEach + void setup() { + MockitoAnnotations.initMocks(this); + } + + @Test + void sanity_capture(final @TempDir Path tempDir) throws IOException { + final File cpuTimeFile = new File(tempDir.toAbsolutePath() + "/cpuTime"); + final File cfsQuotaFile = new File(tempDir.toAbsolutePath() + "/cfsQuota"); + final File cfsPeriodFile = new File(tempDir.toAbsolutePath() + "/cfsPeriod"); + final File effectiveCpusFile = new File(tempDir.toAbsolutePath() + "/cpuset.effective_cpus"); + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + + writeLineToFile(cfsQuotaFile, "20000"); + writeLineToFile(cfsPeriodFile, "10000"); + + final Cgroupv1CpuWorkerMetric cgroupv1CpuWorkerMetrics = new Cgroupv1CpuWorkerMetric( + operatingRange, + cpuTimeFile.getAbsolutePath(), + cfsQuotaFile.getAbsolutePath(), + cfsPeriodFile.getAbsolutePath(), + effectiveCpusFile.getAbsolutePath(), + clock); + + when(clock.millis()).thenReturn(1000L, 2000L); + + writeLineToFile(cpuTimeFile, String.valueOf(TimeUnit.MILLISECONDS.toNanos(1000))); + final WorkerMetric.WorkerMetricValue response1 = cgroupv1CpuWorkerMetrics.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + cpuTimeFile.delete(); + + writeLineToFile(cpuTimeFile, String.valueOf(TimeUnit.MILLISECONDS.toNanos(1500))); + // The Second request asserts non-zero value. + final WorkerMetric.WorkerMetricValue response2 = cgroupv1CpuWorkerMetrics.capture(); + + // Over 1 second time passed, the container has used 1500 ms-1000ms = 0.5 seconds of cpu time. The container + // can use up to 2 cpu cores so cpu utilization is 0.5 / 2 = 25% + assertEquals(25, response2.getValue().doubleValue()); + + cfsQuotaFile.delete(); + cfsPeriodFile.delete(); + cpuTimeFile.delete(); + effectiveCpusFile.delete(); + } + + @Test + void capture_noCpuLimit(final @TempDir Path tempDir) throws IOException { + final File cpuTimeFile = new File(tempDir.toAbsolutePath() + "/cpuTime"); + final File cfsQuotaFile = new File(tempDir.toAbsolutePath() + "/cfsQuota"); + final File cfsPeriodFile = new File(tempDir.toAbsolutePath() + "/cfsPeriod"); + final File effectiveCpusFile = new File(tempDir.toAbsolutePath() + "/cpuset.effective_cpus"); + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + + writeLineToFile(cfsQuotaFile, "-1"); + writeLineToFile(cfsPeriodFile, "10000"); + + final Cgroupv1CpuWorkerMetric cgroupv1CpuWorkerMetrics = new Cgroupv1CpuWorkerMetric( + operatingRange, + cpuTimeFile.getAbsolutePath(), + cfsQuotaFile.getAbsolutePath(), + cfsPeriodFile.getAbsolutePath(), + effectiveCpusFile.getAbsolutePath(), + clock); + + when(clock.millis()).thenReturn(1000L, 2000L); + + // Can use up to 8 cores + writeLineToFile(effectiveCpusFile, "0-7"); + writeLineToFile(cpuTimeFile, String.valueOf(TimeUnit.MILLISECONDS.toNanos(1000))); + final WorkerMetric.WorkerMetricValue response1 = cgroupv1CpuWorkerMetrics.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + cpuTimeFile.delete(); + + writeLineToFile(cpuTimeFile, String.valueOf(TimeUnit.MILLISECONDS.toNanos(1500))); + // The Second request asserts non-zero value. + final WorkerMetric.WorkerMetricValue response2 = cgroupv1CpuWorkerMetrics.capture(); + + // Over 1 second time passed, the container has used 1500 ms-1000ms = 0.5 seconds of cpu time. The container + // can use up to 8 cpu cores so cpu utilization is 0.5 / 8 = 6.25% + assertEquals(6.25, response2.getValue().doubleValue()); + + cfsQuotaFile.delete(); + cfsPeriodFile.delete(); + cpuTimeFile.delete(); + effectiveCpusFile.delete(); + } + + @Test + void sanity_capture_file_not_found() { + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + final Cgroupv1CpuWorkerMetric cgroupv1CpuWorkerMetrics = new Cgroupv1CpuWorkerMetric( + operatingRange, "/someBadPath", "/someBadPath", "/someBadPath", "/someBadPath", clock); + assertThrows(IllegalArgumentException.class, () -> cgroupv1CpuWorkerMetrics.capture()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetricsTest.java new file mode 100644 index 000000000..3fe6db86b --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/Cgroupv2CpuWorkerMetricsTest.java @@ -0,0 +1,115 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Clock; +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.worker.metric.WorkerMetricsTestUtils.writeLineToFile; + +class Cgroupv2CpuWorkerMetricsTest { + + @Mock + private Clock clock; + + @BeforeEach + void setup() { + MockitoAnnotations.initMocks(this); + } + + @Test + void sanity_capture(final @TempDir Path tempDir) throws IOException { + final File cpuMaxFile = new File(tempDir.toAbsolutePath() + "/cpu.max"); + final File effectiveCpusFile = new File(tempDir.toAbsolutePath() + "/cpuset.cpus.effective"); + final File cpuStatFile = new File(tempDir.toAbsolutePath() + "/cpu.stat"); + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + + final Cgroupv2CpuWorkerMetric cgroupv2CpuWorkerMetric = new Cgroupv2CpuWorkerMetric( + operatingRange, + cpuMaxFile.getAbsolutePath(), + effectiveCpusFile.getAbsolutePath(), + cpuStatFile.getAbsolutePath(), + clock); + + when(clock.millis()).thenReturn(1000L, 2000L); + + // Can use 2 cores + writeLineToFile(cpuMaxFile, "20000 10000"); + writeLineToFile(cpuStatFile, "usage_usec " + TimeUnit.MILLISECONDS.toMicros(1000)); + final WorkerMetric.WorkerMetricValue response1 = cgroupv2CpuWorkerMetric.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + + writeLineToFile(cpuStatFile, "usage_usec " + TimeUnit.MILLISECONDS.toMicros(1500)); + // The Second request asserts non-zero value. + final WorkerMetric.WorkerMetricValue response2 = cgroupv2CpuWorkerMetric.capture(); + + // Over 1 second time passed, the container has used 1500 ms- 1000ms = 0.5 seconds of cpu time. The container + // can use up to 2 cpu cores so cpu utilization is 0.5 / 2 = 25% + assertEquals(25, response2.getValue().doubleValue()); + + cpuMaxFile.delete(); + effectiveCpusFile.delete(); + cpuStatFile.delete(); + } + + @Test + void capture_noCpuLimit(final @TempDir Path tempDir) throws IOException { + final File cpuMaxFile = new File(tempDir.toAbsolutePath() + "/cpu.max"); + final File effectiveCpusFile = new File(tempDir.toAbsolutePath() + "/cpuset.cpus.effective"); + final File cpuStatFile = new File(tempDir.toAbsolutePath() + "/cpu.stat"); + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + + final Cgroupv2CpuWorkerMetric cgroupv2CpuWorkerMetric = new Cgroupv2CpuWorkerMetric( + operatingRange, + cpuMaxFile.getAbsolutePath(), + effectiveCpusFile.getAbsolutePath(), + cpuStatFile.getAbsolutePath(), + clock); + + when(clock.millis()).thenReturn(1000L, 2000L); + + // Can use all available cores + writeLineToFile(cpuMaxFile, "max 10000"); + writeLineToFile(effectiveCpusFile, "0-7"); + writeLineToFile(cpuStatFile, "usage_usec " + TimeUnit.MILLISECONDS.toMicros(1000)); + final WorkerMetric.WorkerMetricValue response1 = cgroupv2CpuWorkerMetric.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + + writeLineToFile(cpuStatFile, "usage_usec " + TimeUnit.MILLISECONDS.toMicros(1500)); + // The Second request asserts non-zero value. + final WorkerMetric.WorkerMetricValue response2 = cgroupv2CpuWorkerMetric.capture(); + + // Over 1 second time passed, the container has used 1500 ms- 1000ms = 0.5 seconds of cpu time. The container + // can use up to 8 cpu cores so cpu utilization is 0.5 / 8 = 6.25 + assertEquals(6.25, response2.getValue().doubleValue()); + + cpuMaxFile.delete(); + effectiveCpusFile.delete(); + cpuStatFile.delete(); + } + + @Test + void sanity_capture_file_not_found() { + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + final Cgroupv2CpuWorkerMetric cgroupv2CpuWorkerMetric = + new Cgroupv2CpuWorkerMetric(operatingRange, "/someBadPath", "/someBadPath", "/someBadPath", clock); + assertThrows(IllegalArgumentException.class, () -> cgroupv2CpuWorkerMetric.capture()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetricsTest.java new file mode 100644 index 000000000..3883beafc --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/container/EcsCpuWorkerMetricsTest.java @@ -0,0 +1,131 @@ +package software.amazon.kinesis.worker.metric.impl.container; + +import java.io.IOException; +import java.nio.file.Paths; + +import org.junit.jupiter.api.Test; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class EcsCpuWorkerMetricsTest { + + /** + * Stats has pre cpu usage 100000000 and pre system cpu usage 100000000 + * Stats has cpu usage 150000000 and system cpu usage 200000000 + * Diff cpu usage is 50000000 and diff system cpu usage is 100000000 + * The container that is running is DockerId ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66 + * The container has 50 CPU shares and is the only container that is running = 50 total CPU shares + * The container can use 50/50 = 100% of the CPU time allocated to the task + * Online CPUs for the task is 2 + * Task has no CPU limit so containers can use all the online CPUs (2) + * Total CPU core time the container can use is 2 * 100% = 2 + * CPU usage is 50000000 / 100000000 * 2 = 1 CPU core time + * 1 CPU core time used / 2 available = 50% usage + */ + @Test + void sanity_capture_noTaskCpuLimitOneContainer() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/noTaskCpuLimitOneContainer"; + runWorkerMetricTest(testDataPath, 50D); + } + + /** + * Stats has pre cpu usage 100000000 and pre system cpu usage 100000000 + * Stats has cpu usage 150000000 and system cpu usage 200000000 + * Diff cpu usage is 50000000 and diff system cpu usage is 100000000 + * The container that is running is DockerId ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66 + * The container has 50 CPU shares and is in the same task as another container with 30 CPU shares = 80 total CPU shares + * The container can use 50/80 = 62.5% of the CPU time allocated to the task + * Online CPUs for the task is 2 + * Task has no CPU limit so containers can use all the online CPUs (2) + * Total CPU core time the container can use is 2 * 62.5% = 1.25 + * CPU usage is 50000000 / 100000000 * 2 = 1 CPU core time + * 1 CPU core time used / 1.25 available = 80% usage + */ + @Test + void sanity_capture_noTaskCpuLimitTwoContainers() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/noTaskCpuLimitTwoContainers"; + runWorkerMetricTest(testDataPath, 80D); + } + + /** + * Behaves the same as sanity_capture_noTaskCpuLimitOneContainer, but it is possible for a customer to supply + * a memory limit but not a CPU limit which makes the code path a little different + */ + @Test + void sanity_capture_noTaskCpuLimitButHasMemoryLimitOneContainer() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/noTaskCpuLimitButHasMemoryLimitOneContainer"; + runWorkerMetricTest(testDataPath, 50D); + } + + /** + * Stats has pre cpu usage 100000000 and pre system cpu usage 100000000 + * Stats has cpu usage 150000000 and system cpu usage 200000000 + * Diff cpu usage is 50000000 and diff system cpu usage is 100000000 + * The container that is running is DockerId ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66 + * The container has 50 CPU shares and is the only container that is running = 50 total CPU shares + * The container can use 50/50 = 100% of the CPU time allocated to the task + * Online CPUs for the task is 8, but is overridden by task CPU limit + * Task has CPU limit of 4 + * Total CPU core time the container can use is 2 * 100% = 2 + * CPU usage is 50000000 / 100000000 * 2 = 1 CPU core time + * 1 CPU core time used / 4 available = 25% usage + */ + @Test + void sanity_capture_taskCpuLimitOneContainer() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/taskCpuLimitOneContainer"; + runWorkerMetricTest(testDataPath, 25D); + } + + /** + * Using the same test data as sanity_capture_taskCpuLimitOneContainer. + */ + @Test + void sanity_capture_NoPrecpuStats() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/noPrecpuStats"; + runWorkerMetricTest(testDataPath, 0D); + } + + @Test + void sanity_capture_NoSystemCpuUsage() throws IOException { + final String testDataPath = "src/test/data/ecstestdata/noSystemCpuUsage"; + runWorkerMetricTest(testDataPath, 100D); + } + + @Test + void sanity_capture_bad_metadata_url() { + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + final EcsCpuWorkerMetric ecsCpuWorkerMetric = + new EcsCpuWorkerMetric(operatingRange, "/someBadPath", "/someBadPath", "/someBadPath"); + assertThrows(IllegalArgumentException.class, () -> ecsCpuWorkerMetric.capture()); + } + + void runWorkerMetricTest(String testDataPath, double expectedCpuUtilization) throws IOException { + final OperatingRange operatingRange = + OperatingRange.builder().maxUtilization(80).build(); + + final String containerStatsPath = Paths.get(testDataPath + "/stats") + .toAbsolutePath() + .toUri() + .toURL() + .toString(); + final String taskMetadataPath = Paths.get(testDataPath + "/task") + .toAbsolutePath() + .toUri() + .toURL() + .toString(); + final String containerMetadataPath = Paths.get(testDataPath + "/root") + .toAbsolutePath() + .toUri() + .toURL() + .toString(); + final EcsCpuWorkerMetric ecsCpuWorkerMetric = + new EcsCpuWorkerMetric(operatingRange, containerStatsPath, taskMetadataPath, containerMetadataPath); + + final WorkerMetric.WorkerMetricValue response1 = ecsCpuWorkerMetric.capture(); + assertEquals(expectedCpuUtilization, response1.getValue()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetricsTest.java new file mode 100644 index 000000000..07667f9cc --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/jmx/HeapMemoryAfterGCWorkerMetricsTest.java @@ -0,0 +1,22 @@ +package software.amazon.kinesis.worker.metric.impl.jmx; + +import org.junit.jupiter.api.Test; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class HeapMemoryAfterGCWorkerMetricsTest { + + @Test + void capture_sanity() { + final HeapMemoryAfterGCWorkerMetric heapMemoryAfterGCWorkerMetric = new HeapMemoryAfterGCWorkerMetric( + OperatingRange.builder().maxUtilization(100).build()); + + assertNotNull(heapMemoryAfterGCWorkerMetric.capture().getValue()); + + assertEquals(WorkerMetricType.MEMORY, heapMemoryAfterGCWorkerMetric.getWorkerMetricType()); + assertEquals(WorkerMetricType.MEMORY.getShortName(), heapMemoryAfterGCWorkerMetric.getShortName()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetricsTest.java new file mode 100644 index 000000000..366b48530 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxCpuWorkerMetricsTest.java @@ -0,0 +1,76 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.file.Path; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static software.amazon.kinesis.worker.metric.WorkerMetricsTestUtils.writeLineToFile; + +class LinuxCpuWorkerMetricsTest { + + @Test + void sanity_capture(final @TempDir Path tempDir) throws IOException { + final File statFile = new File(tempDir.toAbsolutePath() + "/cpuStat"); + + final LinuxCpuWorkerMetric linuxCpuWorkerMetric = new LinuxCpuWorkerMetric( + OperatingRange.builder().maxUtilization(80).build(), statFile.getAbsolutePath()); + + writeLineToFile(statFile, String.format("cpu %d %d %d %d %d 0 0 0 0 0", 20000, 200, 2000, 1500000, 1000)); + final WorkerMetric.WorkerMetricValue response1 = linuxCpuWorkerMetric.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + statFile.delete(); + + writeLineToFile(statFile, String.format("cpu %d %d %d %d %d 0 0 0 0 0", 30000, 3000, 30000, 2000000, 2000)); + // The Second request asserts non-zero value. + final WorkerMetric.WorkerMetricValue response2 = linuxCpuWorkerMetric.capture(); + assertEquals(7, (int) response2.getValue().doubleValue()); + statFile.delete(); + } + + @Test + void sanity_capture_file_not_found(final @TempDir Path tempDir) { + final LinuxCpuWorkerMetric linuxCpuWorkerMetric = new LinuxCpuWorkerMetric( + OperatingRange.builder().maxUtilization(80).build(), tempDir.toAbsolutePath() + "/randomPath"); + assertThrows(IllegalArgumentException.class, linuxCpuWorkerMetric::capture); + } + + @Test + void capture_rareIoWaitFieldDecreased_assert0Response(final @TempDir Path tempDir) throws IOException { + final File statFile = new File(tempDir.toAbsolutePath() + "/cpuStat"); + + final LinuxCpuWorkerMetric linuxCpuWorkerMetric = new LinuxCpuWorkerMetric( + OperatingRange.builder().maxUtilization(80).build(), statFile.getAbsolutePath()); + + writeLine( + statFile, String.format("cpu %d %d %d %d %d 0 0 0 0 0", 5469899, 773829, 2079951, 2814572566L, 52048)); + final WorkerMetric.WorkerMetricValue response1 = linuxCpuWorkerMetric.capture(); + // First request so expects the value to be 0; + assertEquals(0D, response1.getValue()); + statFile.delete(); + + writeLine( + statFile, String.format("cpu %d %d %d %d %d 0 0 0 0 0", 5469899, 773829, 2079951, 2814575765L, 52047)); + // The Second request asserts zero value as the iow field has decreased and thus diff_tot < diff_idl + final WorkerMetric.WorkerMetricValue response2 = linuxCpuWorkerMetric.capture(); + assertEquals(0D, Math.round(response2.getValue() * 1000.0) / 1000.0); + statFile.delete(); + } + + private void writeLine(final File file, final String line) throws IOException { + final FileOutputStream fileOutputStream = new FileOutputStream(file); + final OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream); + outputStreamWriter.write(line); + outputStreamWriter.close(); + fileOutputStream.close(); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricTest.java new file mode 100644 index 000000000..eb2c247c1 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metric/impl/linux/LinuxNetworkWorkerMetricTest.java @@ -0,0 +1,182 @@ +package software.amazon.kinesis.worker.metric.impl.linux; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Duration; + +import com.google.common.base.Stopwatch; +import com.google.common.base.Ticker; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import software.amazon.kinesis.worker.metric.OperatingRange; + +import static org.junit.Assert.assertThrows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static software.amazon.kinesis.worker.metric.WorkerMetricsTestUtils.writeLineToFile; + +public class LinuxNetworkWorkerMetricTest { + + // The first and the second input in both cases has a difference of 1048576(1MB) rx bytes and tx 2097152(2MB) bytes. + private static final String INPUT_1 = + "Inter-| Receive | Transmit\n" + + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + " lo: 51335658 460211 0 0 0 0 0 0 51335658 460211 0 0 0 0 0 0\n" + + " eth0: 0 11860562 0 0 0 0 0 4234156 0 3248505 0 0 0 0 0 0\n"; + + private static final String INPUT_2 = + "Inter-| Receive | Transmit\n" + + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + " lo: 51335668 460211 0 0 0 0 0 0 51335678 460211 0 0 0 0 0 0\n" + + " eth0: 1048576 11860562 0 0 0 0 0 4234156 2097152 3248505 0 0 0 0 0 0\n"; + + private static final String NO_WHITESPACE_INPUT_1 = + "Inter-| Receive | Transmit\n" + + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + " lo:51335658 460211 0 0 0 0 0 0 51335658 460211 0 0 0 0 0 0\n" + + " eth0:3120842478 11860562 0 0 0 0 0 4234156 336491180 3248505" + + " 0 0 0 0 0 0\n"; + + private static final String NO_WHITESPACE_INPUT_2 = + "Inter-| Receive | Transmit\n" + + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + " lo:51335668 460211 0 0 0 0 0 0 51335678 460211" + + " 0 0 0 0 0 0\n" + + " eth0:3121891054 11860562 0 0 0 0 0 4234156 338588332 3248505" + + " 0 0 0 0 0 0\n"; + + private static final OperatingRange TEST_OPERATING_RANGE = + OperatingRange.builder().build(); + + @TempDir + private Path tempDir; + + @Test + void capture_sanityWith1SecondTicker() throws IOException { + executeTestForInAndOutWorkerMetric(INPUT_1, INPUT_2, 1000L, 10, 20); + executeTestForInAndOutWorkerMetric(NO_WHITESPACE_INPUT_1, NO_WHITESPACE_INPUT_2, 1000L, 10, 20); + } + + @Test + void capture_sanityWith500MsTicker() throws IOException { + executeTestForInAndOutWorkerMetric(INPUT_1, INPUT_2, 500L, 20, 40); + executeTestForInAndOutWorkerMetric(NO_WHITESPACE_INPUT_1, NO_WHITESPACE_INPUT_2, 500L, 20, 40); + } + + @Test + void capture_withNoTimeElapsed() { + assertThrows( + IllegalArgumentException.class, () -> executeTestForInAndOutWorkerMetric(INPUT_1, INPUT_2, 0L, 20, 40)); + } + + void executeTestForInAndOutWorkerMetric( + final String input1, + final String input2, + final long tickMillis, + final long expectedIn, + final long expectedOut) + throws IOException { + final File statFile = new File(tempDir.toAbsolutePath() + "/netStat"); + + final LinuxNetworkInWorkerMetric linuxNetworkInWorkerMetric = new LinuxNetworkInWorkerMetric( + TEST_OPERATING_RANGE, + "eth0", + statFile.getAbsolutePath(), + 10, + getMockedStopWatchWithOneSecondTicker(tickMillis)); + + writeFileAndRunTest(statFile, linuxNetworkInWorkerMetric, input1, input2, expectedIn); + + final LinuxNetworkOutWorkerMetric linuxNetworkOutWorkerMetric = new LinuxNetworkOutWorkerMetric( + TEST_OPERATING_RANGE, + "eth0", + statFile.getAbsolutePath(), + 10, + getMockedStopWatchWithOneSecondTicker(tickMillis)); + + writeFileAndRunTest(statFile, linuxNetworkOutWorkerMetric, input1, input2, expectedOut); + } + + @Test + void capture_nonExistingFile_assertIllegalArgumentException() { + final LinuxNetworkInWorkerMetric linuxNetworkInWorkerMetric = new LinuxNetworkInWorkerMetric( + TEST_OPERATING_RANGE, "eth0", "/non/existing/file", 10, getMockedStopWatchWithOneSecondTicker(1000L)); + + assertThrows(IllegalArgumentException.class, linuxNetworkInWorkerMetric::capture); + } + + @Test + void capture_nonExistingNetworkInterface_assertIllegalArgumentException() throws IOException { + + final File statFile = new File(tempDir.toAbsolutePath() + "/netStat"); + + final LinuxNetworkInWorkerMetric linuxNetworkInWorkerMetric = new LinuxNetworkInWorkerMetric( + TEST_OPERATING_RANGE, + "randomName", + statFile.getAbsolutePath(), + 10, + getMockedStopWatchWithOneSecondTicker(1000L)); + + writeLineToFile(statFile, INPUT_1); + + assertThrows(IllegalArgumentException.class, linuxNetworkInWorkerMetric::capture); + } + + @Test + void capture_configuredMaxLessThanUtilized_assert100Percent() throws IOException { + final File statFile = new File(tempDir.toAbsolutePath() + "/netStat"); + + // configured bandwidth is 1 MB and utilized bandwidth is 2 MB. + final LinuxNetworkOutWorkerMetric linuxNetworkOutWorkerMetric = new LinuxNetworkOutWorkerMetric( + TEST_OPERATING_RANGE, + "eth0", + statFile.getAbsolutePath(), + 1, + getMockedStopWatchWithOneSecondTicker(1000L)); + + writeFileAndRunTest(statFile, linuxNetworkOutWorkerMetric, NO_WHITESPACE_INPUT_1, NO_WHITESPACE_INPUT_2, 100); + } + + @Test + void capture_maxBandwidthInMBAsZero_assertIllegalArgumentException() throws IOException { + final File statFile = new File(tempDir.toAbsolutePath() + "/netStat"); + + assertThrows( + IllegalArgumentException.class, + () -> new LinuxNetworkOutWorkerMetric( + TEST_OPERATING_RANGE, + "eth0", + statFile.getAbsolutePath(), + 0, + getMockedStopWatchWithOneSecondTicker(1000L))); + } + + private void writeFileAndRunTest( + final File statFile, + final LinuxNetworkWorkerMetricBase linuxNetworkWorkerMetricBase, + final String input1, + final String input2, + final double expectedValues) + throws IOException { + + writeLineToFile(statFile, input1); + // The First call is expected to be returning 0; + assertEquals(0, linuxNetworkWorkerMetricBase.capture().getValue()); + + writeLineToFile(statFile, input2); + assertEquals(expectedValues, linuxNetworkWorkerMetricBase.capture().getValue()); + } + + private Stopwatch getMockedStopWatchWithOneSecondTicker(final long tickMillis) { + final Ticker ticker = new Ticker() { + private int readCount = 0; + + @Override + public long read() { + readCount++; + return Duration.ofMillis(readCount * tickMillis).toNanos(); + } + }; + return Stopwatch.createUnstarted(ticker); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsDAOTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsDAOTest.java new file mode 100644 index 000000000..d712f46f1 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsDAOTest.java @@ -0,0 +1,336 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.IntStream; + +import com.amazonaws.services.dynamodbv2.local.embedded.DynamoDBEmbedded; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import software.amazon.awssdk.core.internal.waiters.DefaultWaiterResponse; +import software.amazon.awssdk.core.waiters.WaiterResponse; +import software.amazon.awssdk.enhanced.dynamodb.DynamoDbAsyncTable; +import software.amazon.awssdk.enhanced.dynamodb.DynamoDbEnhancedAsyncClient; +import software.amazon.awssdk.enhanced.dynamodb.Key; +import software.amazon.awssdk.enhanced.dynamodb.TableSchema; +import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.TableDescription; +import software.amazon.awssdk.services.dynamodb.model.TableStatus; +import software.amazon.awssdk.services.dynamodb.model.Tag; +import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest; +import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter; +import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static software.amazon.awssdk.services.dynamodb.model.BillingMode.PROVISIONED; +import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture; + +class WorkerMetricsDAOTest { + + private static final String TEST_WORKER_METRICS_TABLE = "WorkerMetricsTableTest"; + private static final Long TEST_REPORTER_FREQ_MILLIS = 10_000L; + private static final String TEST_WORKER_ID = "TEST_WORKER_ID"; + private final DynamoDbAsyncClient dynamoDbAsyncClient = + DynamoDBEmbedded.create().dynamoDbAsyncClient(); + private final DynamoDbEnhancedAsyncClient dynamoDbEnhancedAsyncClient = DynamoDbEnhancedAsyncClient.builder() + .dynamoDbClient(dynamoDbAsyncClient) + .build(); + private final DynamoDbAsyncTable workerMetricsTable = + dynamoDbEnhancedAsyncClient.table(TEST_WORKER_METRICS_TABLE, TableSchema.fromBean(WorkerMetricStats.class)); + private WorkerMetricStatsDAO workerMetricsDAO; + + private void setUp() { + final WorkerMetricsTableConfig tableConfig = new WorkerMetricsTableConfig(null); + tableConfig.tableName(TEST_WORKER_METRICS_TABLE); + this.workerMetricsDAO = setUp(tableConfig, this.dynamoDbAsyncClient); + } + + private WorkerMetricStatsDAO setUp( + final WorkerMetricsTableConfig workerMetricsTableConfig, final DynamoDbAsyncClient dynamoDbAsyncClient) { + final WorkerMetricStatsDAO dao = + new WorkerMetricStatsDAO(dynamoDbAsyncClient, workerMetricsTableConfig, TEST_REPORTER_FREQ_MILLIS); + assertDoesNotThrow(dao::initialize); + return dao; + } + + @Test + void initialize_sanity() { + setUp(); + final DescribeTableResponse describeTableResponse = + unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(DescribeTableRequest.builder() + .tableName(TEST_WORKER_METRICS_TABLE) + .build())); + assertEquals(describeTableResponse.table().tableStatus(), TableStatus.ACTIVE, "Table status is not ACTIVE"); + assertFalse(describeTableResponse.table().deletionProtectionEnabled()); + } + + @Test + void initialize_withDeletionProtection_assertDeletionProtection() { + final WorkerMetricsTableConfig config = new WorkerMetricsTableConfig(null); + config.tableName(TEST_WORKER_METRICS_TABLE); + config.deletionProtectionEnabled(true); + setUp(config, dynamoDbAsyncClient); + final DescribeTableResponse describeTableResponse = + unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(DescribeTableRequest.builder() + .tableName(TEST_WORKER_METRICS_TABLE) + .build())); + + assertTrue(describeTableResponse.table().deletionProtectionEnabled()); + } + + /** + * DynamoDBLocal does not support PITR and tags and thus this test is using mocks. + */ + @Test + void initialize_withTagAndPitr_assertCall() { + final DynamoDbAsyncWaiter waiter = mock(DynamoDbAsyncWaiter.class); + final WaiterResponse waiterResponse = DefaultWaiterResponse.builder() + .response(dummyDescribeTableResponse(TableStatus.ACTIVE)) + .attemptsExecuted(1) + .build(); + when(waiter.waitUntilTableExists(any(Consumer.class), any(Consumer.class))) + .thenReturn(CompletableFuture.completedFuture((WaiterResponse) waiterResponse)); + + final DynamoDbAsyncClient dbAsyncClient = mock(DynamoDbAsyncClient.class); + when(dbAsyncClient.waiter()).thenReturn(waiter); + when(dbAsyncClient.createTable(any(CreateTableRequest.class))) + .thenReturn(CompletableFuture.completedFuture(null)); + when(dbAsyncClient.updateContinuousBackups(any(UpdateContinuousBackupsRequest.class))) + .thenReturn(CompletableFuture.completedFuture(null)); + when(dbAsyncClient.describeTable(any(DescribeTableRequest.class))) + .thenThrow(ResourceNotFoundException.builder().build()) + .thenReturn(CompletableFuture.completedFuture(dummyDescribeTableResponse(TableStatus.CREATING))) + .thenReturn(CompletableFuture.completedFuture(dummyDescribeTableResponse(TableStatus.ACTIVE))); + + final ArgumentCaptor createTableRequestArgumentCaptor = + ArgumentCaptor.forClass(CreateTableRequest.class); + final ArgumentCaptor updateContinuousBackupsRequestArgumentCaptor = + ArgumentCaptor.forClass(UpdateContinuousBackupsRequest.class); + + final WorkerMetricsTableConfig config = new WorkerMetricsTableConfig(null); + config.tableName(TEST_WORKER_METRICS_TABLE); + config.pointInTimeRecoveryEnabled(true); + config.tags( + Collections.singleton(Tag.builder().key("Key").value("Value").build())); + setUp(config, dbAsyncClient); + + verify(dbAsyncClient).createTable(createTableRequestArgumentCaptor.capture()); + verify(dbAsyncClient).updateContinuousBackups(updateContinuousBackupsRequestArgumentCaptor.capture()); + + assertEquals(1, createTableRequestArgumentCaptor.getValue().tags().size()); + assertEquals( + "Key", createTableRequestArgumentCaptor.getValue().tags().get(0).key()); + assertEquals( + "Value", + createTableRequestArgumentCaptor.getValue().tags().get(0).value()); + + assertTrue(updateContinuousBackupsRequestArgumentCaptor + .getAllValues() + .get(0) + .pointInTimeRecoverySpecification() + .pointInTimeRecoveryEnabled()); + } + + private static DescribeTableResponse dummyDescribeTableResponse(final TableStatus tableStatus) { + return DescribeTableResponse.builder() + .table(TableDescription.builder().tableStatus(tableStatus).build()) + .build(); + } + + @Test + void updateStats_sanity() { + setUp(); + final WorkerMetricStats workerMetrics = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics.setOperatingRange(ImmutableMap.of("C", ImmutableList.of(100L))); + workerMetricsDAO.updateMetrics(workerMetrics); + + final WorkerMetricStats response1 = getWorkerMetricFromTable(TEST_WORKER_ID); + + assertEquals(workerMetrics, response1, "WorkerMetricStats entry from storage is not matching"); + + final WorkerMetricStats workerMetricsUpdated = createDummyWorkerMetrics(TEST_WORKER_ID); + // Don't update lastUpdateTime + workerMetricsUpdated.setOperatingRange(null); + workerMetricsUpdated.setMetricStats(ImmutableMap.of("M", ImmutableList.of(10D, 12D))); + + workerMetricsDAO.updateMetrics(workerMetricsUpdated); + + final WorkerMetricStats response2 = getWorkerMetricFromTable(TEST_WORKER_ID); + + // assert lastUpdateTime is unchanged. + assertEquals( + response1.getOperatingRange(), response2.getOperatingRange(), "lastUpdateTime attribute is not equal"); + assertNotEquals( + response1.getMetricStats(), + response2.getMetricStats(), + "ResourcesStats attribute is equal wanted unequal"); + } + + @Test + void updateStats_withEmptyStatValue_throwIllegalArgumentException() { + setUp(); + final WorkerMetricStats workerMetrics = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics.setMetricStats(ImmutableMap.of("C", Collections.emptyList())); + + assertThrows( + IllegalArgumentException.class, + () -> workerMetricsDAO.updateMetrics(workerMetrics), + "Validation on empty stats values for workerMetric did not fail with IllegalArgumentException"); + } + + @Test + void updateStats_withUpdateTimeOlderThanAllowed_throwIllegalArgumentException() { + setUp(); + final WorkerMetricStats workerMetrics = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics.setLastUpdateTime( + Instant.now().getEpochSecond() - TimeUnit.MILLISECONDS.toSeconds(5 * TEST_REPORTER_FREQ_MILLIS)); + + assertThrows( + IllegalArgumentException.class, + () -> workerMetricsDAO.updateMetrics(workerMetrics), + "IllegalArgumentException not thrown on very old LastUpdateTime field value."); + } + + @Test + void updateStats_withoutNullRequiredFields_throwIllegalArgumentException() { + setUp(); + final WorkerMetricStats workerMetrics1 = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics1.setLastUpdateTime(null); + + assertThrows( + IllegalArgumentException.class, + () -> workerMetricsDAO.updateMetrics(workerMetrics1), + "IllegalArgumentException not thrown on null lastUpdateTime field."); + + final WorkerMetricStats workerMetrics2 = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics2.setMetricStats(null); + assertThrows( + IllegalArgumentException.class, + () -> workerMetricsDAO.updateMetrics(workerMetrics1), + "IllegalArgumentException not thrown on null resourcesStats field."); + } + + @Test + void getAllWorkerMetrics_sanity() { + setUp(); + populateNWorkerMetrics(10); + + final List response = workerMetricsDAO.getAllWorkerMetricStats(); + assertEquals(10, response.size(), "Invalid no. of workerMetrics item count."); + } + + @Test + void deleteStats_sanity() { + setUp(); + workerMetricsDAO.updateMetrics(createDummyWorkerMetrics(TEST_WORKER_ID)); + + assertTrue( + workerMetricsDAO.deleteMetrics(createDummyWorkerMetrics(TEST_WORKER_ID)), + "DeleteStats operation failed"); + + assertEquals( + 0, + workerMetricsDAO.getAllWorkerMetricStats().size(), + "WorkerMetricStatsDAO delete did not delete the entry"); + } + + @Test + void deleteStats_differentLastUpdateTime_asserConditionalFailure() { + setUp(); + workerMetricsDAO.updateMetrics(createDummyWorkerMetrics(TEST_WORKER_ID)); + + final WorkerMetricStats workerMetrics = createDummyWorkerMetrics(TEST_WORKER_ID); + workerMetrics.setLastUpdateTime(0L); + + assertFalse( + workerMetricsDAO.deleteMetrics(workerMetrics), + "DeleteStats operation did not failed even with different LastUpdateTime"); + + assertEquals( + 1, + workerMetricsDAO.getAllWorkerMetricStats().size(), + "WorkerMetricStatsDAO deleteStats conditional check did not work."); + } + + @Test + void createProvisionedTable() { + final WorkerMetricsTableConfig tableConfig = new WorkerMetricsTableConfig(null); + tableConfig + .tableName(TEST_WORKER_METRICS_TABLE) + .billingMode(PROVISIONED) + .readCapacity(100L) + .writeCapacity(20L); + final WorkerMetricStatsDAO workerMetricsDAO = + new WorkerMetricStatsDAO(dynamoDbAsyncClient, tableConfig, 10000L); + assertDoesNotThrow(() -> workerMetricsDAO.initialize()); + final DescribeTableResponse response = dynamoDbAsyncClient + .describeTable(DescribeTableRequest.builder() + .tableName(TEST_WORKER_METRICS_TABLE) + .build()) + .join(); + Assertions.assertEquals(20L, response.table().provisionedThroughput().writeCapacityUnits()); + Assertions.assertEquals(100L, response.table().provisionedThroughput().readCapacityUnits()); + } + + @Test + void getAllWorkerMetrics_withWorkerMetricsEntryMissingFields_assertGetCallSucceeds() { + setUp(); + workerMetricsDAO.updateMetrics(createDummyWorkerMetrics(TEST_WORKER_ID)); + createAndPutWorkerMetricsEntryAnyRandomAdditionalFieldInTable("SomeWorker2"); + final List response = workerMetricsDAO.getAllWorkerMetricStats(); + assertEquals(2, response.size(), "Invalid no. of workerMetrics item count."); + } + + private WorkerMetricStats getWorkerMetricFromTable(final String workerId) { + return unwrappingFuture(() -> workerMetricsTable.getItem( + Key.builder().partitionValue(workerId).build())); + } + + private void populateNWorkerMetrics(final int n) { + IntStream.range(0, n) + .forEach(i -> workerMetricsDAO.updateMetrics(createDummyWorkerMetrics(TEST_WORKER_ID + i))); + } + + private WorkerMetricStats createDummyWorkerMetrics(final String workerId) { + final long currentTime = Instant.now().getEpochSecond(); + return WorkerMetricStats.builder() + .workerId(workerId) + .lastUpdateTime(currentTime) + .metricStats(ImmutableMap.of("C", ImmutableList.of(10D, 12D))) + .build(); + } + + // This entry is bad as it does not have required field and have some other random field + private void createAndPutWorkerMetricsEntryAnyRandomAdditionalFieldInTable(final String workerId) { + final PutItemRequest putItemRequest = PutItemRequest.builder() + .tableName(TEST_WORKER_METRICS_TABLE) + .item(ImmutableMap.of( + "wid", AttributeValue.builder().s(workerId).build(), + "invalidField", AttributeValue.builder().s("someValue").build())) + .build(); + + dynamoDbAsyncClient.putItem(putItemRequest).join(); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsManagerTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsManagerTest.java new file mode 100644 index 000000000..203ae3fad --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsManagerTest.java @@ -0,0 +1,210 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import software.amazon.awssdk.services.cloudwatch.model.StandardUnit; +import software.amazon.kinesis.metrics.MetricsFactory; +import software.amazon.kinesis.metrics.MetricsLevel; +import software.amazon.kinesis.metrics.MetricsScope; +import software.amazon.kinesis.worker.metric.OperatingRange; +import software.amazon.kinesis.worker.metric.WorkerMetric; +import software.amazon.kinesis.worker.metric.WorkerMetricType; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager.METRICS_IN_MEMORY_REPORTER_FAILURE; + +class WorkerMetricsManagerTest { + + private static final int TEST_STATS_COUNT = 10; + + private Map> metricsMap; + + @Mock + private MetricsFactory metricsFactory; + + private final MetricsScope metricsScope = new MetricsScope() { + + @Override + public void addData(String name, double value, StandardUnit unit) { + metricsMap.putIfAbsent(name, new ArrayList<>()); + metricsMap.get(name).add(value); + } + + @Override + public void addData(String name, double value, StandardUnit unit, MetricsLevel level) { + metricsMap.putIfAbsent(name, new ArrayList<>()); + metricsMap.get(name).add(value); + } + + @Override + public void addDimension(String name, String value) {} + + @Override + public void end() {} + }; + + @BeforeEach + void setup() { + MockitoAnnotations.initMocks(this); + when(metricsFactory.createMetrics()).thenReturn(metricsScope); + metricsMap = new HashMap<>(); + } + + @Test + void computeStats_sanity() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(10); + final TestWorkerMetric testWorkerMetric = new TestWorkerMetric(countDownLatch, 10.0, false); + + final WorkerMetricStatsManager workerMetricsManager = + createManagerInstanceAndWaitTillAwait(testWorkerMetric, countDownLatch); + + assertTrue(workerMetricsManager + .getWorkerMetricsToRawHighFreqValuesMap() + .get(testWorkerMetric) + .size() + >= 10); + workerMetricsManager + .getWorkerMetricsToRawHighFreqValuesMap() + .get(testWorkerMetric) + .forEach(value -> assertEquals(10D, value, "in memory stats map have incorrect value.")); + + List values1 = workerMetricsManager.computeMetrics().get(WorkerMetricType.CPU.getShortName()); + assertEquals(1, values1.size(), "Lengths of values list does not match"); + assertEquals(10D, values1.get(0), "Averaged value is not correct"); + // After computeStats called, inMemoryQueue is expected to drain. + assertEquals( + 0, + workerMetricsManager + .getWorkerMetricsToRawHighFreqValuesMap() + .get(testWorkerMetric) + .size()); + + // calling stats again without inMemory update is expected to return -1 for last value. + List values2 = workerMetricsManager.computeMetrics().get(WorkerMetricType.CPU.getShortName()); + assertEquals(2, values2.size(), "Lengths of values list does not match"); + assertEquals(-1, values2.get(1), "Last value of compute stats is not -1"); + } + + @Test + void computeStats_workerMetricReturningValueWithMoreThan6DigitAfterDecimal_assertTriming() + throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(5); + final TestWorkerMetric testWorkerMetric = new TestWorkerMetric(countDownLatch, 10.12345637888234234, false); + final WorkerMetricStatsManager workerMetricsManager = + createManagerInstanceAndWaitTillAwait(testWorkerMetric, countDownLatch); + final List values1 = workerMetricsManager.computeMetrics().get(WorkerMetricType.CPU.getShortName()); + + // assert that upto 6 digit after decimal is returned + assertEquals(10.123456, values1.get(0)); + } + + @Test + void computeStats_workerMetricReturningNull_expectWorkerMetricFailureStatsComputed() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(10); + final TestWorkerMetric testWorkerMetric = new TestWorkerMetric(countDownLatch, null, false); + + final WorkerMetricStatsManager workerMetricsManager = + createManagerInstanceAndWaitTillAwait(testWorkerMetric, countDownLatch); + + assertEquals( + 0, + workerMetricsManager + .getWorkerMetricsToRawHighFreqValuesMap() + .get(testWorkerMetric) + .size()); + assertEquals( + 1, + workerMetricsManager + .computeMetrics() + .get(WorkerMetricType.CPU.getShortName()) + .size(), + "Lengths of values list does not match"); + } + + @ParameterizedTest + @CsvSource({"101, false", "50, true", "-10, false", ", false"}) + void recordStats_workerMetricReturningInvalidValues_assertNoDataRecordedAndMetricsForFailure( + final Double value, final boolean shouldThrowException) throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(10); + final TestWorkerMetric testWorkerMetric = new TestWorkerMetric(countDownLatch, value, shouldThrowException); + + final WorkerMetricStatsManager workerMetricsManager = + createManagerInstanceAndWaitTillAwait(testWorkerMetric, countDownLatch); + List metricsValues = metricsMap.get(METRICS_IN_MEMORY_REPORTER_FAILURE); + + assertTrue(metricsValues.size() > 0, "Metrics for reporter failure not published"); + assertEquals(1, metricsMap.get(METRICS_IN_MEMORY_REPORTER_FAILURE).get(0)); + assertEquals( + 0, + workerMetricsManager + .getWorkerMetricsToRawHighFreqValuesMap() + .get(testWorkerMetric) + .size()); + } + + private WorkerMetricStatsManager createManagerInstanceAndWaitTillAwait( + final TestWorkerMetric testWorkerMetric, final CountDownLatch countDownLatch) throws InterruptedException { + final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager( + TEST_STATS_COUNT, Collections.singletonList(testWorkerMetric), metricsFactory, 10); + + workerMetricsManager.startManager(); + boolean awaitSuccess = countDownLatch.await(10 * 15, TimeUnit.MILLISECONDS); + workerMetricsManager.stopManager(); + + assertTrue(awaitSuccess, "CountDownLatch did not complete successfully"); + + return workerMetricsManager; + } + + @Slf4j + @RequiredArgsConstructor + private static class TestWorkerMetric implements WorkerMetric { + + private final WorkerMetricType workerMetricType = WorkerMetricType.CPU; + + private final CountDownLatch countDownLatch; + private final Double workerMetricValue; + private final Boolean shouldThrowException; + + @Override + public String getShortName() { + return workerMetricType.getShortName(); + } + + @Override + public WorkerMetricValue capture() { + countDownLatch.countDown(); + + if (shouldThrowException) { + throw new RuntimeException("Test exception"); + } + + return WorkerMetricValue.builder().value(workerMetricValue).build(); + } + + @Override + public OperatingRange getOperatingRange() { + return null; + } + + public WorkerMetricType getWorkerMetricType() { + return workerMetricType; + } + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsTest.java new file mode 100644 index 000000000..42d95d37e --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/metricstats/WorkerMetricsTest.java @@ -0,0 +1,157 @@ +package software.amazon.kinesis.worker.metricstats; + +import java.time.Instant; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class WorkerMetricsTest { + + @Test + void isAnyWorkerMetricFailing_withFailingWorkerMetric_assertTrue() { + final WorkerMetricStats workerMetrics = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of( + "C", ImmutableList.of(50D, -1D), + "M", ImmutableList.of(20D, 11D))) + .build(); + + assertTrue( + workerMetrics.isAnyWorkerMetricFailing(), + "isAnyWorkerMetricFailing does not return true even with failing workerMetric"); + } + + @Test + void isAnyWorkerMetricFailing_withoutFailingWorkerMetric_assertFalse() { + final WorkerMetricStats workerMetrics = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of( + "C", ImmutableList.of(50D, 1D), + "M", ImmutableList.of(-1D, 11D))) + .build(); + + assertFalse( + workerMetrics.isAnyWorkerMetricFailing(), + "isAnyWorkerMetricFailing does not return false even without failing workerMetric"); + } + + @Test + void isAnyWorkerMetricFailing_withoutAnyValues_assertFalse() { + final WorkerMetricStats workerMetrics = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of())) + .build(); + + assertFalse( + workerMetrics.isAnyWorkerMetricFailing(), + "isAnyWorkerMetricFailing does not return false even without failing workerMetric"); + } + + @Test + void isValidWorkerMetrics_sanity() { + final WorkerMetricStats workerMetricsEntryForDefaultWorkerMetric = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .build(); + + assertTrue(workerMetricsEntryForDefaultWorkerMetric.isValidWorkerMetric()); + assertTrue(workerMetricsEntryForDefaultWorkerMetric.isUsingDefaultWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryWithEmptyResourceMapsForDefaultWorkerMetric = + WorkerMetricStats.builder() + .workerId("WorkerId1") + .metricStats(ImmutableMap.of()) + .operatingRange(ImmutableMap.of()) + .lastUpdateTime(Instant.now().getEpochSecond()) + .build(); + + assertTrue(workerMetricsEntryWithEmptyResourceMapsForDefaultWorkerMetric.isValidWorkerMetric()); + assertTrue(workerMetricsEntryWithEmptyResourceMapsForDefaultWorkerMetric.isUsingDefaultWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryMissingOperatingRange = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of())) + .build(); + + assertFalse(workerMetricsEntryMissingOperatingRange.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryMissingLastUpdateTime = WorkerMetricStats.builder() + .workerId("WorkerId1") + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L, 10L))) + .build(); + + assertFalse(workerMetricsEntryMissingLastUpdateTime.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryMissingResourceMetrics = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L, 10L))) + .build(); + + assertFalse(workerMetricsEntryMissingResourceMetrics.isValidWorkerMetric()); + + // C workerMetric has resourceStats but not operatingRange + final WorkerMetricStats workerMetricsEntryWithMismatchWorkerStatsAndOperatingRangeKey = + WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("M", ImmutableList.of(80L, 10L))) + .build(); + + assertFalse(workerMetricsEntryWithMismatchWorkerStatsAndOperatingRangeKey.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryWithEmptyOperatingRangeValue = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of())) + .build(); + + assertFalse(workerMetricsEntryWithEmptyOperatingRangeValue.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryWithNoMetricStats = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of()) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L, 10L))) + .build(); + + assertTrue(workerMetricsEntryWithNoMetricStats.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryWithNullResourceMetrics = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L, 10L))) + .build(); + + assertFalse(workerMetricsEntryWithNullResourceMetrics.isValidWorkerMetric()); + + final WorkerMetricStats workerMetricsEntryWithZeroMaxUtilization = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(0L, 10L))) + .build(); + + assertFalse(workerMetricsEntryWithZeroMaxUtilization.isValidWorkerMetric()); + + final WorkerMetricStats validWorkerMetricsEntry = WorkerMetricStats.builder() + .workerId("WorkerId1") + .lastUpdateTime(Instant.now().getEpochSecond()) + .metricStats(ImmutableMap.of("C", ImmutableList.of(5D, 5D))) + .operatingRange(ImmutableMap.of("C", ImmutableList.of(80L, 10L))) + .build(); + + assertTrue(validWorkerMetricsEntry.isValidWorkerMetric()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/Ec2ResourceTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/Ec2ResourceTest.java new file mode 100644 index 000000000..1ab069b1b --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/Ec2ResourceTest.java @@ -0,0 +1,59 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.IOException; +import java.net.HttpURLConnection; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class Ec2ResourceTest { + + private UrlOpener mockIdUrl; + private UrlOpener mockTokenUrl; + private HttpURLConnection mockIdConnection; + private HttpURLConnection mockTokenConnection; + private Ec2Resource ec2Resource; + + @BeforeEach + void setUp() throws Exception { + mockIdUrl = mock(UrlOpener.class); + mockTokenUrl = mock(UrlOpener.class); + mockIdConnection = mock(HttpURLConnection.class); + mockTokenConnection = mock(HttpURLConnection.class); + ec2Resource = new Ec2Resource(mockIdUrl, mockTokenUrl); + when(mockIdUrl.openConnection()).thenReturn(mockIdConnection); + when(mockTokenUrl.openConnection()).thenReturn(mockTokenConnection); + } + + @Test + void testIsEc2WhenResponseCode200() throws Exception { + when(mockIdConnection.getResponseCode()).thenReturn(200); + assertTrue(ec2Resource.isOnPlatform()); + assertEquals(ResourceMetadataProvider.ComputePlatform.EC2, ec2Resource.getPlatform()); + } + + @Test + void testIsEc2WhenTokenConnectionThrowsBecauseImdsV1() throws Exception { + when(mockTokenConnection.getResponseCode()).thenThrow(new IOException()); + when(mockIdConnection.getResponseCode()).thenReturn(200); + assertTrue(ec2Resource.isOnPlatform()); + } + + @Test + void testIsNotEc2() throws Exception { + when(mockIdConnection.getResponseCode()).thenReturn(403); + assertFalse(ec2Resource.isOnPlatform()); + } + + @Test + void testIsNotEc2WhenConnectionThrows() throws Exception { + when(mockIdConnection.getResponseCode()).thenThrow(new IOException()); + assertFalse(ec2Resource.isOnPlatform()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EcsResourceTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EcsResourceTest.java new file mode 100644 index 000000000..974f27b13 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EcsResourceTest.java @@ -0,0 +1,43 @@ +package software.amazon.kinesis.worker.platform; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class EcsResourceTest { + + private static final String ECS_METADATA_KEY_V4 = "ECS_CONTAINER_METADATA_URI_V4"; + private static final String ECS_METADATA_KEY_V3 = "ECS_CONTAINER_METADATA_URI"; + + @Test + void testIsEcs() { + final Map mockSysEnv = new HashMap<>(); + mockSysEnv.put(ECS_METADATA_KEY_V3, "v3"); + assertTrue(new EcsResource(mockSysEnv).isOnPlatform()); + + // test when both v3 and v4 exists + mockSysEnv.put(ECS_METADATA_KEY_V4, "v4"); + assertTrue(new EcsResource(mockSysEnv).isOnPlatform()); + + // test when only v4 exists + mockSysEnv.remove(ECS_METADATA_KEY_V3); + final EcsResource ecsResource = new EcsResource(mockSysEnv); + assertTrue(ecsResource.isOnPlatform()); + assertEquals(ResourceMetadataProvider.ComputePlatform.ECS, ecsResource.getPlatform()); + } + + @Test + void testIsNotEcs() { + final Map mockSysEnv = new HashMap<>(); + assertFalse(new EcsResource(mockSysEnv).isOnPlatform()); + + mockSysEnv.put(ECS_METADATA_KEY_V3, ""); + mockSysEnv.put(ECS_METADATA_KEY_V4, ""); + assertFalse(new EcsResource(mockSysEnv).isOnPlatform()); + } +} diff --git a/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EksResourceTest.java b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EksResourceTest.java new file mode 100644 index 000000000..a74ae5044 --- /dev/null +++ b/amazon-kinesis-client/src/test/java/software/amazon/kinesis/worker/platform/EksResourceTest.java @@ -0,0 +1,33 @@ +package software.amazon.kinesis.worker.platform; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class EksResourceTest { + + @TempDir + Path tempDir; + + @Test + void testIsEks() throws IOException { + final File mockK8sTokenFile = new File(tempDir.toFile(), "k8sToken"); + mockK8sTokenFile.createNewFile(); + final EksResource eksResource = new EksResource(mockK8sTokenFile.getPath()); + assertTrue(eksResource.isOnPlatform()); + assertEquals(ResourceMetadataProvider.ComputePlatform.EKS, eksResource.getPlatform()); + } + + @Test + void testIsNotEks() { + final EksResource eksResource = new EksResource(""); + assertFalse(eksResource.isOnPlatform()); + } +} diff --git a/amazon-kinesis-client/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/amazon-kinesis-client/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 000000000..1f0955d45 --- /dev/null +++ b/amazon-kinesis-client/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline diff --git a/docs/multilang/configuring-credential-providers.md b/docs/multilang/configuring-credential-providers.md index 9b85baaaa..67494a6a2 100644 --- a/docs/multilang/configuring-credential-providers.md +++ b/docs/multilang/configuring-credential-providers.md @@ -8,24 +8,33 @@ This document should help multilang customers configure a suitable `CredentialPr ## Sample Provider Configuration -In a Properties file, an `AWSCredentialsProperty` configuration might look like: +In a Properties file, an `AwsCredentialsProperty` configuration might look like: ``` -AWSCredentialsProvider = STSAssumeRoleSessionCredentialsProvider|| +AwsCredentialsProvider = StsAssumeRoleCredentialsProvider|| ``` -This basic configuration creates an [STSAssumeRoleSessionCredentialsProvider][sts-assume-provider] with an ARN and session name. +This basic configuration creates an [StsAssumeRoleCredentialsProvider][sts-assume-provider] with an ARN and session name. + +The providers generated by this config property will be [AWS SDK v2 AwsCredentialsProviders][aws-credentials-provider]. +These differ from the SDK v1 AWSCredentialsProviders in a number of ways. See [Credentials Provider Changes][credentials-provider-changes]. + While functional, this configuration is limited. For example, this configuration cannot set a regional endpoint (e.g., VPC use case). -Leveraging nested properties, an `AWSCredentialsProperty` value might change to: +Leveraging nested properties, an `AwsCredentialsProperty` value might change to: ``` -AWSCredentialsProvider = KclSTSAssumeRoleSessionCredentialsProvider||\ +AwsCredentialsProvider = KclSTSAssumeRoleSessionCredentialsProvider||\ |endpointRegion=us-east-1|externalId=spartacus ``` N.B. Backslash (`\`) is for multi-line legibility and is not required. +You can create a default [DefaultCredentialsProvider][default-credentials-provider] by passing it in the config like: +``` +AwsCredentialsProvider = DefaultCredentialsProvider +``` + ## Nested Properties -KCL multilang supports "nested properties" on the `AWSCredentialsProvider` key in the properties file. +KCL multilang supports "nested properties" on the `AwsCredentialsProvider` key in the properties file. The [Backus-Naur form][bnf] of the value: ``` ::= ["|" ]* ["|" ]* @@ -36,8 +45,9 @@ The [Backus-Naur form][bnf] of the value: # this depends on the nested key ``` -In general, required parameters are passed directly to the class' constructor -(e.g., [STSAssumeRoleSessionCredentialsProvider(String, String)][sts-assume-provider-constructor]). +In general, required parameters are passed directly to the class' constructor or .create() method +(e.g., [ProfileCredentialsProvider(String)][profile-credentials-provider-create]). However, most of these providers +require builders and will require a custom implementation similar to `KclStsAssumeRoleCredentialsProvider` for customization Nested properties are a custom mapping provided by KCL multilang, and do not exist in the AWS SDK. See [NestedPropertyKey][nested-property-key] for the supported keys, and details on their expected values. @@ -54,18 +64,27 @@ A backwards-compatible addition might look like: } ``` -### KclSTSAssumeRoleSessionCredentialsProvider +Leveraging nested properties, an `AwsCredentialsProperty` value might look like: +``` +AwsCredentialsProvider = KclSTSAssumeRoleSessionCredentialsProvider||\ + |endpointRegion=us-east-1|externalId=spartacus +``` + +N.B. Backslash (`\`) is for multi-line legibility and is not required. +### KclStsAssumeRoleCredentialsProvider -KCL multilang includes a [custom nested property processor for `STSAssumeRole`][kcl-sts-provider]. -Multilang configurations that use `STSAssumeRoleSessionCredentialsProvider` need only prefix `Kcl` to exercise this new provider: +KCL multilang includes a [custom nested property processor for `StsAssumeRole`][kcl-sts-provider]. +Multilang configurations that use `StsAssumeRoleSessionCredentialsProvider` need only prefix `Kcl` to exercise this new provider: ``` -AWSCredentialsProvider = KclSTSAssumeRoleSessionCredentialsProvider|| +AwsCredentialsProvider = KclStsAssumeRoleCredentialsProvider|| ``` [aws-credentials-provider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.html [bnf]: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form -[kcl-sts-provider]: /amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclSTSAssumeRoleSessionCredentialsProvider.java +[kcl-sts-provider]: /amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/auth/KclStsAssumeRoleCredentialsProvider.java [nested-property-key]: /amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyKey.java [nested-property-processor]: /amazon-kinesis-client-multilang/src/main/java/software/amazon/kinesis/multilang/NestedPropertyProcessor.java -[sts-assume-provider]: https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/STSAssumeRoleSessionCredentialsProvider.html -[sts-assume-provider-constructor]: https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/STSAssumeRoleSessionCredentialsProvider.html#STSAssumeRoleSessionCredentialsProvider-java.lang.String-java.lang.String- +[sts-assume-provider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/sts/auth/StsAssumeRoleCredentialsProvider.html +[profile-credentials-provider-create]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/ProfileCredentialsProvider.html#create(java.lang.String) +[default-credentials-provider]: https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html +[credentials-provider-changes]: https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/migration-client-credentials.html diff --git a/pom.xml b/pom.xml index e9a9bcb0e..a387dbd35 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ amazon-kinesis-client-pom pom Amazon Kinesis Client Library - 2.6.1-SNAPSHOT + 3.0.0 The Amazon Kinesis Client Library for Java enables Java developers to easily consume and process data from Amazon Kinesis.