diff --git a/.doc_gen/metadata/entityresolution_metadata.yaml b/.doc_gen/metadata/entityresolution_metadata.yaml new file mode 100644 index 00000000000..b318b6c2a41 --- /dev/null +++ b/.doc_gen/metadata/entityresolution_metadata.yaml @@ -0,0 +1,162 @@ +entityresolution_Hello: + title: Hello &ERlong; + title_abbrev: Hello &ER; + synopsis: get started using &ER;. + category: Hello + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_hello.main + services: + entityresolution: {listMatchingWorkflows} +entityresolution_DeleteSchemaMapping: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_delete_mappings.main + services: + entityresolution: {DeleteSchemaMapping} +entityresolution_TagEntityResource: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_tag_resource.main + services: + entityresolution: {TagEntityResource} +entityresolution_CreateMatchingWorkflow: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_create_matching_workflow.main + services: + entityresolution: {CreateMatchingWorkflow} +entityresolution_CheckWorkflowStatus: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_check_matching_workflow.main + services: + entityresolution: {CheckWorkflowStatus} +entityresolution_StartMatchingJob: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_start_job.main + services: + entityresolution: {StartMatchingJob} +entityresolution_GetMatchingJob: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_get_job.main + services: + entityresolution: {GetMatchingJob} +entityresolution_DeleteMatchingWorkflow: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_delete_matching_workflow.main + services: + entityresolution: {DeleteMatchingWorkflow} +entityresolution_ListSchemaMappings: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_list_mappings.main + services: + entityresolution: {ListSchemaMappings} +entityresolution_GetSchemaMapping: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_get_schema_mapping.main + services: + entityresolution: {GetSchemaMapping} +entityresolution_CreateSchemaMapping: + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + excerpts: + - description: + snippet_tags: + - entityres.java2_create_schema.main + services: + entityresolution: {CreateSchemaMapping} +entityresolution_Scenario: + synopsis_list: + - Create Schema Mapping. + - Create an &ERlong; workflow. + - Start the matching job for the workflow. + - Get details for the matching job. + - Get Schema Mapping. + - List all Schema Mappings. + - Tag the Schema Mapping resource. + - Delete the &ERlong; Assets. + category: Basics + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/entityresolution + sdkguide: + excerpts: + - description: Run an interactive scenario demonstrating &ERlong; features. + snippet_tags: + - entityres.java2_scenario.main + - description: A wrapper class for &ERlong; SDK methods. + snippet_tags: + - entityres.java2_actions.main + services: + entityresolution: {} diff --git a/.doc_gen/validation.yaml b/.doc_gen/validation.yaml index ea3da0450aa..242ab71ebf2 100644 --- a/.doc_gen/validation.yaml +++ b/.doc_gen/validation.yaml @@ -5,6 +5,7 @@ allow_list: - "e9772d140489982e0e3704fea5ee93d536f1e275" # Safe look-alikes, mostly tokens and paths that happen to be 40 characters. - "/AmazonEventBridgeServiceIntegrationTest" + - "erbucketf684533d2680435fa99d24b1bdaf5179" - "/ListOrganizationalUnitsForParentExample" - "AmazonDataZoneBedrockPermissionsBoundary" - "AWSEC2/latest/APIReference/OperationList" diff --git a/javav2/example_code/entityresolution/.gitignore b/javav2/example_code/entityresolution/.gitignore new file mode 100644 index 00000000000..5ff6309b719 --- /dev/null +++ b/javav2/example_code/entityresolution/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/javav2/example_code/entityresolution/README.md b/javav2/example_code/entityresolution/README.md new file mode 100644 index 00000000000..70ff18acb90 --- /dev/null +++ b/javav2/example_code/entityresolution/README.md @@ -0,0 +1,123 @@ +# AWS Entity Resolution code examples for the SDK for Java 2.x + +## Overview + +Shows how to use the AWS SDK for Java 2.x to work with AWS Entity Resolution. + + + + +_AWS Entity Resolution helps organizations extract, link, and organize information from multiple data sources._ + +## ⚠ Important + +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/) and [Free Tier](https://aws.amazon.com/free/). +* Running the tests might result in charges to your AWS account. +* We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). +* This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). + + + + +## Code examples + +### Prerequisites + +For prerequisites, see the [README](../../README.md#Prerequisites) in the `javav2` folder. + + + + + +### Get started + +- [Hello AWS Entity Resolution](src/main/java/com/example/entity/HelloEntityResoultion.java#L19) (`listMatchingWorkflows`) + + +### Basics + +Code examples that show you how to perform the essential operations within a service. + +- [Learn the basics](src/main/java/com/example/entity/scenario/EntityResScenario.java) + + +### Single actions + +Code excerpts that show you how to call individual service functions. + +- [CheckWorkflowStatus](src/main/java/com/example/entity/scenario/EntityResActions.java#L391) +- [CreateMatchingWorkflow](src/main/java/com/example/entity/scenario/EntityResActions.java#L429) +- [CreateSchemaMapping](src/main/java/com/example/entity/scenario/EntityResActions.java#L230) +- [DeleteMatchingWorkflow](src/main/java/com/example/entity/scenario/EntityResActions.java#L196) +- [DeleteSchemaMapping](src/main/java/com/example/entity/scenario/EntityResActions.java#L137) +- [GetMatchingJob](src/main/java/com/example/entity/scenario/EntityResActions.java#L317) +- [GetSchemaMapping](src/main/java/com/example/entity/scenario/EntityResActions.java#L280) +- [ListSchemaMappings](src/main/java/com/example/entity/scenario/EntityResActions.java#L173) +- [StartMatchingJob](src/main/java/com/example/entity/scenario/EntityResActions.java#L354) +- [TagEntityResource](src/main/java/com/example/entity/scenario/EntityResActions.java#L516) + + + + + +## Run the examples + +### Instructions + + + + + +#### Hello AWS Entity Resolution + +This example shows you how to get started using AWS Entity Resolution. + + +#### Learn the basics + +This example shows you how to do the following: + +- Create Schema Mapping. +- Create an AWS Entity Resolution workflow. +- Start the matching job for the workflow. +- Get details for the matching job. +- Get Schema Mapping. +- List all Schema Mappings. +- Tag the Schema Mapping resource. +- Delete the AWS Entity Resolution Assets. + + + + + + + + + +### Tests + +⚠ Running tests might result in charges to your AWS account. + + +To find instructions for running these tests, see the [README](../../README.md#Tests) +in the `javav2` folder. + + + + + + +## Additional resources + +- [AWS Entity Resolution User Guide](https://docs.aws.amazon.com/entityresolution/latest/userguide/what-is-service.html) +- [AWS Entity Resolution API Reference](https://docs.aws.amazon.com/entityresolution/latest/apireference/Welcome.html) +- [SDK for Java 2.x AWS Entity Resolution reference](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/entityresolution/package-summary.html) + + + + +--- + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +SPDX-License-Identifier: Apache-2.0 diff --git a/javav2/example_code/entityresolution/pom.xml b/javav2/example_code/entityresolution/pom.xml new file mode 100644 index 00000000000..a70292a446b --- /dev/null +++ b/javav2/example_code/entityresolution/pom.xml @@ -0,0 +1,132 @@ + + + 4.0.0 + + org.example + entityresolution + 1.0-SNAPSHOT + + UTF-8 + 17 + 17 + 17 + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.1 + + IntegrationTest + + + + + + + + software.amazon.awssdk + bom + 2.29.45 + pom + import + + + org.apache.logging.log4j + log4j-bom + 2.23.1 + pom + import + + + + + + org.junit.jupiter + junit-jupiter-api + 5.9.2 + test + + + org.junit.jupiter + junit-jupiter-engine + 5.9.2 + test + + + software.amazon.awssdk + secretsmanager + + + com.google.code.gson + gson + 2.10.1 + + + org.junit.platform + junit-platform-commons + 1.9.2 + + + org.junit.platform + junit-platform-launcher + 1.9.2 + test + + + software.amazon.awssdk + entityresolution + + + com.opencsv + opencsv + 5.7.1 + + + software.amazon.awssdk + s3 + + + + org.fusesource.jansi + jansi + 2.4.0 + + + software.amazon.awssdk + netty-nio-client + + + software.amazon.awssdk + cloudformation + + + software.amazon.awssdk + sso + + + software.amazon.awssdk + ssooidc + + + org.apache.logging.log4j + log4j-core + + + org.slf4j + slf4j-api + 2.0.13 + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + org.apache.logging.log4j + log4j-1.2-api + + + \ No newline at end of file diff --git a/javav2/example_code/entityresolution/src/main/java/com/example/entity/HelloEntityResoultion.java b/javav2/example_code/entityresolution/src/main/java/com/example/entity/HelloEntityResoultion.java new file mode 100644 index 00000000000..770f183c9ee --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/java/com/example/entity/HelloEntityResoultion.java @@ -0,0 +1,93 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.entity; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.retry.RetryMode; +import software.amazon.awssdk.http.async.SdkAsyncHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.entityresolution.EntityResolutionAsyncClient; +import software.amazon.awssdk.services.entityresolution.model.ListMatchingWorkflowsRequest; +import software.amazon.awssdk.services.entityresolution.paginators.ListMatchingWorkflowsPublisher; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; + +// snippet-start:[entityres.java2_hello.main] +/** + * Before running this Java V2 code example, set up your development + * environment, including your credentials. + * + * For more information, see the following documentation topic: + * + * https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html + */ +public class HelloEntityResoultion { + + private static final Logger logger = LoggerFactory.getLogger(HelloEntityResoultion.class); + + private static EntityResolutionAsyncClient entityResolutionAsyncClient; + public static void main(String[] args) { + listMatchingWorkflows(); + } + + public static EntityResolutionAsyncClient getResolutionAsyncClient() { + if (entityResolutionAsyncClient == null) { + /* + The `NettyNioAsyncHttpClient` class is part of the AWS SDK for Java, version 2, + and it is designed to provide a high-performance, asynchronous HTTP client for interacting with AWS services. + It uses the Netty framework to handle the underlying network communication and the Java NIO API to + provide a non-blocking, event-driven approach to HTTP requests and responses. + */ + + SdkAsyncHttpClient httpClient = NettyNioAsyncHttpClient.builder() + .maxConcurrency(50) // Adjust as needed. + .connectionTimeout(Duration.ofSeconds(60)) // Set the connection timeout. + .readTimeout(Duration.ofSeconds(60)) // Set the read timeout. + .writeTimeout(Duration.ofSeconds(60)) // Set the write timeout. + .build(); + + ClientOverrideConfiguration overrideConfig = ClientOverrideConfiguration.builder() + .apiCallTimeout(Duration.ofMinutes(2)) // Set the overall API call timeout. + .apiCallAttemptTimeout(Duration.ofSeconds(90)) // Set the individual call attempt timeout. + .retryStrategy(RetryMode.STANDARD) + .build(); + + entityResolutionAsyncClient = EntityResolutionAsyncClient.builder() + .httpClient(httpClient) + .overrideConfiguration(overrideConfig) + .build(); + } + return entityResolutionAsyncClient; + } + + /** + * Lists all matching workflows using an asynchronous paginator. + *

+ * This method requests a paginated list of matching workflows from the + * AWS Entity Resolution service and logs the names of the retrieved workflows. + * It uses an asynchronous approach with a paginator and waits for the operation + * to complete using {@code CompletableFuture#join()}. + *

+ */ + public static void listMatchingWorkflows() { + ListMatchingWorkflowsRequest request = ListMatchingWorkflowsRequest.builder().build(); + + ListMatchingWorkflowsPublisher paginator = + getResolutionAsyncClient().listMatchingWorkflowsPaginator(request); + + // Iterate through the paginated results asynchronously + CompletableFuture future = paginator.subscribe(response -> { + response.workflowSummaries().forEach(workflow -> + logger.info("Matching Workflow Name: " + workflow.workflowName()) + ); + }); + + // Wait for the asynchronous operation to complete + future.join(); + } +} +// snippet-end:[entityres.java2_hello.main] diff --git a/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/CloudFormationHelper.java b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/CloudFormationHelper.java new file mode 100644 index 00000000000..12f48a586bd --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/CloudFormationHelper.java @@ -0,0 +1,188 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.entity.scenario; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.retry.RetryMode; +import software.amazon.awssdk.http.async.SdkAsyncHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.services.cloudformation.CloudFormationAsyncClient; +import software.amazon.awssdk.services.cloudformation.model.Capability; +import software.amazon.awssdk.services.cloudformation.model.CloudFormationException; +import software.amazon.awssdk.services.cloudformation.model.DescribeStacksRequest; +import software.amazon.awssdk.services.cloudformation.model.DescribeStacksResponse; +import software.amazon.awssdk.services.cloudformation.model.Output; +import software.amazon.awssdk.services.cloudformation.model.Stack; +import software.amazon.awssdk.services.cloudformation.waiters.CloudFormationAsyncWaiter; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; + +public class CloudFormationHelper { + private static final String CFN_TEMPLATE = "template.yaml"; + private static final Logger logger = LoggerFactory.getLogger(CloudFormationHelper.class); + + private static CloudFormationAsyncClient cloudFormationClient; + + public static void main(String[] args) { + emptyS3Bucket(args[0]); + } + + private static CloudFormationAsyncClient getCloudFormationClient() { + if (cloudFormationClient == null) { + SdkAsyncHttpClient httpClient = NettyNioAsyncHttpClient.builder() + .maxConcurrency(100) + .connectionTimeout(Duration.ofSeconds(60)) + .readTimeout(Duration.ofSeconds(60)) + .writeTimeout(Duration.ofSeconds(60)) + .build(); + + ClientOverrideConfiguration overrideConfig = ClientOverrideConfiguration.builder() + .apiCallTimeout(Duration.ofMinutes(2)) + .apiCallAttemptTimeout(Duration.ofSeconds(90)) + .retryStrategy(RetryMode.STANDARD) + .build(); + + cloudFormationClient = CloudFormationAsyncClient.builder() + .httpClient(httpClient) + .overrideConfiguration(overrideConfig) + .build(); + } + return cloudFormationClient; + } + + public static void deployCloudFormationStack(String stackName) { + String templateBody; + boolean doesExist = describeStack(stackName); + if (!doesExist) { + try { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + Path filePath = Paths.get(classLoader.getResource(CFN_TEMPLATE).toURI()); + templateBody = Files.readString(filePath); + } catch (IOException | URISyntaxException e) { + throw new RuntimeException(e); + } + + getCloudFormationClient().createStack(b -> b.stackName(stackName) + .templateBody(templateBody) + .capabilities(Capability.CAPABILITY_IAM)) + .whenComplete((csr, t) -> { + if (csr != null) { + System.out.println("Stack creation requested, ARN is " + csr.stackId()); + try (CloudFormationAsyncWaiter waiter = getCloudFormationClient().waiter()) { + waiter.waitUntilStackCreateComplete(request -> request.stackName(stackName)) + .whenComplete((dsr, th) -> { + if (th != null) { + System.out.println("Error waiting for stack creation: " + th.getMessage()); + } else { + dsr.matched().response().orElseThrow(() -> new RuntimeException("Failed to deploy")); + System.out.println("Stack created successfully"); + } + }).join(); + } + } else { + System.out.format("Error creating stack: " + t.getMessage(), t); + throw new RuntimeException(t.getCause().getMessage(), t); + } + }).join(); + } else { + logger.info("{} stack already exists", stackName); + } + } + + // Check to see if the Stack exists before deploying it + public static Boolean describeStack(String stackName) { + try { + CompletableFuture future = getCloudFormationClient().describeStacks(); + DescribeStacksResponse stacksResponse = (DescribeStacksResponse) future.join(); + List stacks = stacksResponse.stacks(); + for (Stack myStack : stacks) { + if (myStack.stackName().compareTo(stackName) == 0) { + return true; + } + } + } catch (CloudFormationException e) { + System.err.println(e.getMessage()); + } + return false; + } + + public static void destroyCloudFormationStack(String stackName) { + getCloudFormationClient().deleteStack(b -> b.stackName(stackName)) + .whenComplete((dsr, t) -> { + if (dsr != null) { + System.out.println("Delete stack requested ...."); + try (CloudFormationAsyncWaiter waiter = getCloudFormationClient().waiter()) { + waiter.waitUntilStackDeleteComplete(request -> request.stackName(stackName)) + .whenComplete((waiterResponse, throwable) -> + System.out.println("Stack deleted successfully.")) + .join(); + } + } else { + System.out.format("Error deleting stack: " + t.getMessage(), t); + throw new RuntimeException(t.getCause().getMessage(), t); + } + }).join(); + } + + public static CompletableFuture> getStackOutputsAsync(String stackName) { + CloudFormationAsyncClient cloudFormationAsyncClient = getCloudFormationClient(); + + DescribeStacksRequest describeStacksRequest = DescribeStacksRequest.builder() + .stackName(stackName) + .build(); + + return cloudFormationAsyncClient.describeStacks(describeStacksRequest) + .handle((describeStacksResponse, throwable) -> { + if (throwable != null) { + throw new RuntimeException("Failed to get stack outputs for: " + stackName, throwable); + } + + // Process the result + if (describeStacksResponse.stacks().isEmpty()) { + throw new RuntimeException("Stack not found: " + stackName); + } + + Stack stack = describeStacksResponse.stacks().get(0); + Map outputs = new HashMap<>(); + for (Output output : stack.outputs()) { + outputs.put(output.outputKey(), output.outputValue()); + } + + return outputs; + }); + } + + public static void emptyS3Bucket(String bucketName) { + S3AsyncClient s3Client = S3AsyncClient.builder().build(); + + s3Client.listObjectsV2(req -> req.bucket(bucketName)) + .thenCompose(response -> { + List> deleteFutures = response.contents().stream() + .map(s3Object -> s3Client.deleteObject(req -> req + .bucket(bucketName) + .key(s3Object.key()))) + .collect(Collectors.toList()); + + return CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0])); + }) + .join(); + + s3Client.close(); + } +} + diff --git a/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResActions.java b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResActions.java new file mode 100644 index 00000000000..82935f66fc2 --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResActions.java @@ -0,0 +1,759 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.entity.scenario; + +import com.opencsv.CSVReader; +import com.opencsv.exceptions.CsvException; +import org.fusesource.jansi.AnsiConsole; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.retry.RetryMode; +import software.amazon.awssdk.http.async.SdkAsyncHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.entityresolution.EntityResolutionAsyncClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.entityresolution.model.ConflictException; +import software.amazon.awssdk.services.entityresolution.model.CreateMatchingWorkflowRequest; +import software.amazon.awssdk.services.entityresolution.model.CreateMatchingWorkflowResponse; +import software.amazon.awssdk.services.entityresolution.model.CreateSchemaMappingRequest; +import software.amazon.awssdk.services.entityresolution.model.CreateSchemaMappingResponse; +import software.amazon.awssdk.services.entityresolution.model.DeleteMatchingWorkflowRequest; +import software.amazon.awssdk.services.entityresolution.model.DeleteMatchingWorkflowResponse; +import software.amazon.awssdk.services.entityresolution.model.DeleteSchemaMappingRequest; +import software.amazon.awssdk.services.entityresolution.model.DeleteSchemaMappingResponse; +import software.amazon.awssdk.services.entityresolution.model.GetMatchingJobRequest; +import software.amazon.awssdk.services.entityresolution.model.GetMatchingJobResponse; +import software.amazon.awssdk.services.entityresolution.model.GetSchemaMappingRequest; +import software.amazon.awssdk.services.entityresolution.model.GetSchemaMappingResponse; +import software.amazon.awssdk.services.entityresolution.model.InputSource; +import software.amazon.awssdk.services.entityresolution.model.JobMetrics; +import software.amazon.awssdk.services.entityresolution.model.ListSchemaMappingsRequest; +import software.amazon.awssdk.services.entityresolution.model.OutputAttribute; +import software.amazon.awssdk.services.entityresolution.model.OutputSource; +import software.amazon.awssdk.services.entityresolution.model.ResolutionTechniques; +import software.amazon.awssdk.services.entityresolution.model.ResolutionType; +import software.amazon.awssdk.services.entityresolution.model.ResourceNotFoundException; +import software.amazon.awssdk.services.entityresolution.model.SchemaAttributeType; +import software.amazon.awssdk.services.entityresolution.model.SchemaInputAttribute; +import software.amazon.awssdk.services.entityresolution.model.StartMatchingJobRequest; +import software.amazon.awssdk.services.entityresolution.model.TagResourceResponse; +import software.amazon.awssdk.services.entityresolution.model.ValidationException; +import software.amazon.awssdk.services.entityresolution.paginators.ListSchemaMappingsPublisher; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.entityresolution.model.TagResourceRequest; + +import java.io.IOException; +import java.io.StringReader; +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.fusesource.jansi.Ansi.ansi; + +// snippet-start:[entityres.java2_actions.main] +public class EntityResActions { + + private static final String PREFIX = "eroutput/"; + private static final Logger logger = LoggerFactory.getLogger(EntityResActions.class); + + private static EntityResolutionAsyncClient entityResolutionAsyncClient; + + private static S3AsyncClient s3AsyncClient; + + public static EntityResolutionAsyncClient getResolutionAsyncClient() { + if (entityResolutionAsyncClient == null) { + /* + The `NettyNioAsyncHttpClient` class is part of the AWS SDK for Java, version 2, + and it is designed to provide a high-performance, asynchronous HTTP client for interacting with AWS services. + It uses the Netty framework to handle the underlying network communication and the Java NIO API to + provide a non-blocking, event-driven approach to HTTP requests and responses. + */ + + SdkAsyncHttpClient httpClient = NettyNioAsyncHttpClient.builder() + .maxConcurrency(50) // Adjust as needed. + .connectionTimeout(Duration.ofSeconds(60)) // Set the connection timeout. + .readTimeout(Duration.ofSeconds(60)) // Set the read timeout. + .writeTimeout(Duration.ofSeconds(60)) // Set the write timeout. + .build(); + + ClientOverrideConfiguration overrideConfig = ClientOverrideConfiguration.builder() + .apiCallTimeout(Duration.ofMinutes(2)) // Set the overall API call timeout. + .apiCallAttemptTimeout(Duration.ofSeconds(90)) // Set the individual call attempt timeout. + .retryStrategy(RetryMode.STANDARD) + .build(); + + entityResolutionAsyncClient = EntityResolutionAsyncClient.builder() + .httpClient(httpClient) + .overrideConfiguration(overrideConfig) + .build(); + } + return entityResolutionAsyncClient; + } + + public static S3AsyncClient getS3AsyncClient() { + if (s3AsyncClient == null) { + /* + The `NettyNioAsyncHttpClient` class is part of the AWS SDK for Java, version 2, + and it is designed to provide a high-performance, asynchronous HTTP client for interacting with AWS services. + It uses the Netty framework to handle the underlying network communication and the Java NIO API to + provide a non-blocking, event-driven approach to HTTP requests and responses. + */ + + SdkAsyncHttpClient httpClient = NettyNioAsyncHttpClient.builder() + .maxConcurrency(50) // Adjust as needed. + .connectionTimeout(Duration.ofSeconds(60)) // Set the connection timeout. + .readTimeout(Duration.ofSeconds(60)) // Set the read timeout. + .writeTimeout(Duration.ofSeconds(60)) // Set the write timeout. + .build(); + + ClientOverrideConfiguration overrideConfig = ClientOverrideConfiguration.builder() + .apiCallTimeout(Duration.ofMinutes(2)) // Set the overall API call timeout. + .apiCallAttemptTimeout(Duration.ofSeconds(90)) // Set the individual call attempt timeout. + .retryStrategy(RetryMode.STANDARD) + .build(); + + s3AsyncClient = S3AsyncClient.builder() + .httpClient(httpClient) + .overrideConfiguration(overrideConfig) + .build(); + } + return s3AsyncClient; + } + + // snippet-start:[entityres.java2_delete_mappings.main] + /** + * Deletes the schema mapping asynchronously. + * + * @param schemaName the name of the schema to delete + * @return a {@link CompletableFuture} that completes when the schema mapping is deleted successfully, + * or throws a {@link RuntimeException} if the deletion fails + */ + public CompletableFuture deleteSchemaMappingAsync(String schemaName) { + DeleteSchemaMappingRequest request = DeleteSchemaMappingRequest.builder() + .schemaName(schemaName) + .build(); + + return getResolutionAsyncClient().deleteSchemaMapping(request) + .whenComplete((response, exception) -> { + if (response != null) { + // Successfully deleted the schema mapping, log the success message. + logger.info("Schema mapping '{}' deleted successfully.", schemaName); + } else { + // Ensure exception is not null before accessing its cause. + if (exception == null) { + throw new CompletionException("An unknown error occurred while deleting the schema mapping.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The schema mapping was not found to delete: " + schemaName, cause); + } + + // Wrap other AWS exceptions in a CompletionException. + throw new CompletionException("Failed to delete schema mapping: " + schemaName, exception); + } + }); + } + // snippet-end:[entityres.java2_delete_mappings.main] + + // snippet-start:[entityres.java2_list_mappings.main] + /** + * Lists the schema mappings associated with the current AWS account. This method uses an asynchronous paginator to + * retrieve the schema mappings, and prints the name of each schema mapping to the console. + */ + public void ListSchemaMappings() { + ListSchemaMappingsRequest mappingsRequest = ListSchemaMappingsRequest.builder() + .build(); + + ListSchemaMappingsPublisher paginator = getResolutionAsyncClient().listSchemaMappingsPaginator(mappingsRequest); + + // Iterate through the pages of results + CompletableFuture future = paginator.subscribe(response -> { + response.schemaList().forEach(schemaMapping -> + logger.info("Schema Mapping Name: " + schemaMapping.schemaName()) + ); + }); + + // Wait for the asynchronous operation to complete + future.join(); + } + // snippet-end:[entityres.java2_list_mappings.main] + + // snippet-start:[entityres.java2_delete_matching_workflow.main] + /** + * Asynchronously deletes a workflow with the specified name. + * + * @param workflowName the name of the workflow to be deleted + * @return a {@link CompletableFuture} that completes when the workflow has been deleted + * @throws RuntimeException if the deletion of the workflow fails + */ + public CompletableFuture deleteMatchingWorkflowAsync(String workflowName) { + DeleteMatchingWorkflowRequest request = DeleteMatchingWorkflowRequest.builder() + .workflowName(workflowName) + .build(); + + return getResolutionAsyncClient().deleteMatchingWorkflow(request) + .whenComplete((response, exception) -> { + if (response != null) { + logger.info("{} was deleted", workflowName ); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while deleting the workflow.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The workflow to delete was not found.", cause); + } + + // Wrap other AWS exceptions in a CompletionException. + throw new CompletionException("Failed to delete workflow: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_delete_matching_workflow.main] + + // snippet-start:[entityres.java2_create_schema.main] + /** + * Creates a schema mapping asynchronously. + * + * @param schemaName the name of the schema to create + * @return a {@link CompletableFuture} that represents the asynchronous creation of the schema mapping + */ + public CompletableFuture createSchemaMappingAsync(String schemaName) { + List schemaAttributes = null; + if (schemaName.startsWith("json")) { + schemaAttributes = List.of( + SchemaInputAttribute.builder().matchKey("id").fieldName("id").type(SchemaAttributeType.UNIQUE_ID).build(), + SchemaInputAttribute.builder().matchKey("name").fieldName("name").type(SchemaAttributeType.NAME).build(), + SchemaInputAttribute.builder().matchKey("email").fieldName("email").type(SchemaAttributeType.EMAIL_ADDRESS).build() + ); + } else { + schemaAttributes = List.of( + SchemaInputAttribute.builder().matchKey("id").fieldName("id").type(SchemaAttributeType.UNIQUE_ID).build(), + SchemaInputAttribute.builder().matchKey("name").fieldName("name").type(SchemaAttributeType.NAME).build(), + SchemaInputAttribute.builder().matchKey("email").fieldName("email").type(SchemaAttributeType.EMAIL_ADDRESS).build(), + SchemaInputAttribute.builder().fieldName("phone").type(SchemaAttributeType.PROVIDER_ID).subType("STRING").build() + ); + } + + CreateSchemaMappingRequest request = CreateSchemaMappingRequest.builder() + .schemaName(schemaName) + .mappedInputFields(schemaAttributes) + .build(); + + return getResolutionAsyncClient().createSchemaMapping(request) + .whenComplete((response, exception) -> { + if (response != null) { + logger.info("[{}] schema mapping Created Successfully!", schemaName); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while creating the schema mapping.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ConflictException) { + throw new CompletionException("A conflicting schema mapping already exists. Resolve conflicts before proceeding.", cause); + } + + // Wrap other AWS exceptions in a CompletionException. + throw new CompletionException("Failed to create schema mapping: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_create_schema.main] + + // snippet-start:[entityres.java2_get_schema_mapping.main] + /** + * Retrieves the schema mapping asynchronously. + * + * @param schemaName the name of the schema to retrieve the mapping for + * @return a {@link CompletableFuture} that completes with the {@link GetSchemaMappingResponse} when the operation + * is complete + * @throws RuntimeException if the schema mapping retrieval fails + */ + public CompletableFuture getSchemaMappingAsync(String schemaName) { + GetSchemaMappingRequest mappingRequest = GetSchemaMappingRequest.builder() + .schemaName(schemaName) + .build(); + + return getResolutionAsyncClient().getSchemaMapping(mappingRequest) + .whenComplete((response, exception) -> { + if (response != null) { + response.mappedInputFields().forEach(attribute -> + logger.info("Attribute Name: " + attribute.fieldName() + + ", Attribute Type: " + attribute.type().toString())); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while getting schema mapping.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The requested schema mapping was not found.", cause); + } + + // Wrap other exceptions in a CompletionException with the message. + throw new CompletionException("Failed to get schema mapping: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_get_schema_mapping.main] + + // snippet-start:[entityres.java2_get_job.main] + /** + * Asynchronously retrieves a matching job based on the provided job ID and workflow name. + * + * @param jobId the ID of the job to retrieve + * @param workflowName the name of the workflow associated with the job + * @return a {@link CompletableFuture} that completes when the job information is available or an exception occurs + */ + public CompletableFuture getMatchingJobAsync(String jobId, String workflowName) { + GetMatchingJobRequest request = GetMatchingJobRequest.builder() + .jobId(jobId) + .workflowName(workflowName) + .build(); + + return getResolutionAsyncClient().getMatchingJob(request) + .whenComplete((response, exception) -> { + if (response != null) { + // Successfully fetched the matching job details, log the job status. + logger.info("Job status: " + response.status()); + logger.info("Job details: " + response.toString()); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while fetching the matching job.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The requested job could not be found.", cause); + } + + // Wrap other exceptions in a CompletionException with the message. + throw new CompletionException("Error fetching matching job: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_get_job.main] + + // snippet-start:[entityres.java2_start_job.main] + + /** + * Starts a matching job asynchronously for the specified workflow name. + * + * @param workflowName the name of the workflow for which to start the matching job + * @return a {@link CompletableFuture} that completes with the job ID of the started matching job, or an empty + * string if the operation fails + */ + public CompletableFuture startMatchingJobAsync(String workflowName) { + StartMatchingJobRequest jobRequest = StartMatchingJobRequest.builder() + .workflowName(workflowName) + .build(); + + return getResolutionAsyncClient().startMatchingJob(jobRequest) + .whenComplete((response, exception) -> { + if (response != null) { + String jobId = response.jobId(); + logger.info("Job ID: " + jobId); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while starting the job.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ConflictException) { + throw new CompletionException("The job is already running. Resolve conflicts before starting a new job.", cause); + } + + // Wrap other AWS exceptions in a CompletionException. + throw new CompletionException("Failed to start the job: " + exception.getMessage(), exception); + } + }) + .thenApply(response -> response != null ? response.jobId() : ""); + } + // snippet-end:[entityres.java2_start_job.main] + + // snippet-start:[entityres.java2_check_matching_workflow.main] + /** + * Checks the status of a workflow asynchronously. + * + * @param jobId the ID of the job to check + * @param workflowName the name of the workflow to check + * @return a CompletableFuture that resolves to a boolean value indicating whether the workflow has completed + * successfully + */ + public CompletableFuture checkWorkflowStatusCompleteAsync(String jobId, String workflowName) { + GetMatchingJobRequest request = GetMatchingJobRequest.builder() + .jobId(jobId) + .workflowName(workflowName) + .build(); + + return getResolutionAsyncClient().getMatchingJob(request) + .whenComplete((response, exception) -> { + if (response != null) { + // Process the response and log the job status. + logger.info("Job status: " + response.status()); + } else { + // Ensure exception is not null before accessing its cause. + if (exception == null) { + throw new CompletionException("An unknown error occurred while checking job status.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The requested resource was not found while checking the job status.", cause); + } + + // Wrap other AWS exceptions in a CompletionException. + throw new CompletionException("Failed to check job status: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_check_matching_workflow.main] + + // snippet-start:[entityres.java2_create_matching_workflow.main] + /** + * Creates an asynchronous CompletableFuture to manage the creation of a matching workflow. + * + * @param roleARN the AWS IAM role ARN to be used for the workflow execution + * @param workflowName the name of the workflow to be created + * @param outputBucket the S3 bucket path where the workflow output will be stored + * @param jsonGlueTableArn the ARN of the Glue Data Catalog table to be used as the input source + * @param jsonErSchemaMappingName the name of the schema to be used for the input source + * @return a CompletableFuture that, when completed, will return the ARN of the created workflow + */ + public CompletableFuture createMatchingWorkflowAsync( + String roleARN + , String workflowName + , String outputBucket + , String jsonGlueTableArn + , String jsonErSchemaMappingName + , String csvGlueTableArn + , String csvErSchemaMappingName) { + + InputSource jsonInputSource = InputSource.builder() + .inputSourceARN(jsonGlueTableArn) + .schemaName(jsonErSchemaMappingName) + .applyNormalization(false) + .build(); + + InputSource csvInputSource = InputSource.builder() + .inputSourceARN(csvGlueTableArn) + .schemaName(csvErSchemaMappingName) + .applyNormalization(false) + .build(); + + OutputAttribute idOutputAttribute = OutputAttribute.builder() + .name("id") + .build(); + + OutputAttribute nameOutputAttribute = OutputAttribute.builder() + .name("name") + .build(); + + OutputAttribute emailOutputAttribute = OutputAttribute.builder() + .name("email") + .build(); + + OutputAttribute phoneOutputAttribute = OutputAttribute.builder() + .name("phone") + .build(); + + OutputSource outputSource = OutputSource.builder() + .outputS3Path("s3://" + outputBucket + "/eroutput") + .output(idOutputAttribute, nameOutputAttribute, emailOutputAttribute, phoneOutputAttribute) + .applyNormalization(false) + .build(); + + ResolutionTechniques resolutionType = ResolutionTechniques.builder() + .resolutionType(ResolutionType.ML_MATCHING) + .build(); + + CreateMatchingWorkflowRequest workflowRequest = CreateMatchingWorkflowRequest.builder() + .roleArn(roleARN) + .description("Created by using the AWS SDK for Java") + .workflowName(workflowName) + .inputSourceConfig(List.of(jsonInputSource, csvInputSource)) + .outputSourceConfig(List.of(outputSource)) + .resolutionTechniques(resolutionType) + .build(); + + return getResolutionAsyncClient().createMatchingWorkflow(workflowRequest) + .whenComplete((response, exception) -> { + if (response != null) { + logger.info("Workflow created successfully."); + } else { + Throwable cause = exception.getCause(); + if (cause instanceof ValidationException) { + throw new CompletionException("Invalid request: Please check input parameters.", cause); + } + + if (cause instanceof ConflictException) { + throw new CompletionException("A conflicting workflow already exists. Resolve conflicts before proceeding.", cause); + } + throw new CompletionException("Failed to create workflow: " + exception.getMessage(), exception); + } + }) + .thenApply(CreateMatchingWorkflowResponse::workflowArn); + } + // snippet-end:[entityres.java2_create_matching_workflow.main] + + // snippet-start:[entityres.java2_tag_resource.main] + /** + * Tags the specified schema mapping ARN. + * + * @param schemaMappingARN the ARN of the schema mapping to tag + */ + public CompletableFuture tagEntityResource(String schemaMappingARN) { + Map tags = new HashMap<>(); + tags.put("tag1", "tag1Value"); + tags.put("tag2", "tag2Value"); + + TagResourceRequest request = TagResourceRequest.builder() + .resourceArn(schemaMappingARN) + .tags(tags) + .build(); + + return getResolutionAsyncClient().tagResource(request) + .whenComplete((response, exception) -> { + if (response != null) { + // Successfully tagged the resource, log the success message. + logger.info("Successfully tagged the resource."); + } else { + if (exception == null) { + throw new CompletionException("An unknown error occurred while tagging the resource.", null); + } + + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("The resource to tag was not found.", cause); + } + throw new CompletionException("Failed to tag the resource: " + exception.getMessage(), exception); + } + }); + } + // snippet-end:[entityres.java2_tag_resource.main] + + // snippet-start:[entityres.java2_job_info.main] + public CompletableFuture getJobInfo(String workflowName, String jobId) { + return getResolutionAsyncClient().getMatchingJob(b -> b + .workflowName(workflowName) + .jobId(jobId)) + .whenComplete((response, exception) -> { + if (response != null) { + logger.info("Job metrics fetched successfully for jobId: " + jobId); + } else { + Throwable cause = exception.getCause(); + if (cause instanceof ResourceNotFoundException) { + throw new CompletionException("Invalid request: Job id was not found.", cause); + } + throw new CompletionException("Failed to fetch job info: " + exception.getMessage(), exception); + } + }) + .thenApply(response -> response.metrics()); // Extract job metrics + } + // snippet-end:[entityres.java2_job_info.main] + + /** + * Uploads data to an Amazon S3 bucket asynchronously. + * + * @param bucketName the name of the S3 bucket to upload the data to + * @param jsonData the JSON data to be uploaded + * @param csvData the CSV data to be uploaded + * @return a {@link CompletableFuture} representing both asynchronous operation of uploading the data + * @throws RuntimeException if an error occurs during the file upload + */ + + public void uploadInputData(String bucketName, String jsonData, String csvData) { + // Upload JSON data. + String jsonKey = "jsonData/data.json"; + PutObjectRequest jsonUploadRequest = PutObjectRequest.builder() + .bucket(bucketName) + .key(jsonKey) + .contentType("application/json") + .build(); + + CompletableFuture jsonUploadResponse = getS3AsyncClient().putObject(jsonUploadRequest, AsyncRequestBody.fromString(jsonData)); + + // Upload CSV data. + String csvKey = "csvData/data.csv"; + PutObjectRequest csvUploadRequest = PutObjectRequest.builder() + .bucket(bucketName) + .key(csvKey) + .contentType("text/csv") + .build(); + CompletableFuture csvUploadResponse = getS3AsyncClient().putObject(csvUploadRequest, AsyncRequestBody.fromString(csvData)); + + CompletableFuture.allOf(jsonUploadResponse, csvUploadResponse) + .whenComplete((result, ex) -> { + if (ex != null) { + // Wrap an AWS exception. + throw new CompletionException("Failed to upload files", ex); + } + }).join(); + + } + + /** + * Finds the latest file in the S3 bucket that starts with "run-" in any depth of subfolders + */ + private CompletableFuture findLatestMatchingFile(String bucketName) { + ListObjectsV2Request request = ListObjectsV2Request.builder() + .bucket(bucketName) + .prefix(PREFIX) // Searches within the given folder + .build(); + + return getS3AsyncClient().listObjectsV2(request) + .thenApply(response -> response.contents().stream() + .map(S3Object::key) + .filter(key -> key.matches(".*?/run-[0-9a-zA-Z\\-]+")) // Matches files like run-XXXXX in any subfolder + .max(String::compareTo) // Gets the latest file + .orElse(null)) + .whenComplete((result, exception) -> { + if (exception == null) { + if (result != null) { + logger.info("Latest matching file found: " + result); + } else { + logger.info("No matching files found."); + } + } else { + throw new CompletionException("Failed to find latest matching file: " + exception.getMessage(), exception); + } + }); + } + + /** + * Prints the data located in the file in the S3 bucket that starts with "run-" in any depth of subfolders + */ + public void printData(String bucketName) { + try { + // Find the latest file with "run-" prefix in any depth of subfolders. + String s3Key = findLatestMatchingFile(bucketName).join(); + if (s3Key == null) { + logger.error("No matching files found in S3."); + return; + } + + logger.info("Downloading file: " + s3Key); + + // Read CSV file as String. + String csvContent = readCSVFromS3Async(bucketName, s3Key).join(); + if (csvContent.isEmpty()) { + logger.error("File is empty."); + return; + } + + // Process CSV content. + List records = parseCSV(csvContent); + printTable(records); + + } catch (RuntimeException | IOException | CsvException e) { + logger.error("Error processing CSV file from S3: " + e.getMessage()); + e.printStackTrace(); + } + } + + /** + * Reads a CSV file from S3 and returns it as a String. + */ + private static CompletableFuture readCSVFromS3Async(String bucketName, String s3Key) { + GetObjectRequest getObjectRequest = GetObjectRequest.builder() + .bucket(bucketName) + .key(s3Key) + .build(); + + // Initiating the asynchronous request to get the file as bytes + return getS3AsyncClient().getObject(getObjectRequest, AsyncResponseTransformer.toBytes()) + .thenApply(responseBytes -> responseBytes.asUtf8String()) // Convert bytes to UTF-8 string + .whenComplete((result, exception) -> { + if (exception != null) { + throw new CompletionException("Failed to read CSV from S3: " + exception.getMessage(), exception); + } else { + logger.info("Successfully fetched CSV file content from S3."); + } + }); + } + + /** + * Parses CSV content from a String into a list of records. + */ + private static List parseCSV(String csvContent) throws IOException, CsvException { + try (CSVReader csvReader = new CSVReader(new StringReader(csvContent))) { + return csvReader.readAll(); + } + } + + /** + * Prints the given CSV data in a formatted table + */ + private static void printTable(List records) { + if (records.isEmpty()) { + System.out.println("No records found."); + return; + } + + String[] headers = records.get(0); + List rows = records.subList(1, records.size()); + + // Determine column widths dynamically based on longest content + int[] columnWidths = new int[headers.length]; + for (int i = 0; i < headers.length; i++) { + final int columnIndex = i; + int maxWidth = Math.max(headers[i].length(), rows.stream() + .map(row -> row.length > columnIndex ? row[columnIndex].length() : 0) + .max(Integer::compareTo) + .orElse(0)); + columnWidths[i] = Math.min(maxWidth, 25); // Limit max width for better readability + } + + // Enable ANSI Console for colored output + AnsiConsole.systemInstall(); + + // Print table header + System.out.println(ansi().fgYellow().a("=== CSV Data from S3 ===").reset()); + printRow(headers, columnWidths, true); + + // Print rows + rows.forEach(row -> printRow(row, columnWidths, false)); + + // Restore console to normal + AnsiConsole.systemUninstall(); + } + + private static void printRow(String[] row, int[] columnWidths, boolean isHeader) { + String border = IntStream.range(0, columnWidths.length) + .mapToObj(i -> "-".repeat(columnWidths[i] + 2)) + .collect(Collectors.joining("+", "+", "+")); + + if (isHeader) { + System.out.println(border); + } + + System.out.print("|"); + for (int i = 0; i < columnWidths.length; i++) { + String cell = (i < row.length && row[i] != null) ? row[i] : ""; + System.out.printf(" %-" + columnWidths[i] + "s |", isHeader ? ansi().fgBrightBlue().a(cell).reset() : cell); + } + System.out.println(); + + if (isHeader) { + System.out.println(border); + } + } +} +// snippet-end:[entityres.java2_actions.main] \ No newline at end of file diff --git a/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResScenario.java b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResScenario.java new file mode 100644 index 00000000000..75f7dfc26f4 --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResScenario.java @@ -0,0 +1,492 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.entity.scenario; + +import software.amazon.awssdk.services.cloudformation.model.CloudFormationException; +import software.amazon.awssdk.services.entityresolution.model.ConflictException; +import software.amazon.awssdk.services.entityresolution.model.CreateSchemaMappingResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.entityresolution.model.GetMatchingJobResponse; +import software.amazon.awssdk.services.entityresolution.model.GetSchemaMappingResponse; +import software.amazon.awssdk.services.entityresolution.model.JobMetrics; +import software.amazon.awssdk.services.entityresolution.model.ResourceNotFoundException; +import software.amazon.awssdk.services.entityresolution.model.ValidationException; +import software.amazon.awssdk.services.s3.model.S3Exception; +import java.util.Map; +import java.util.Scanner; +import java.util.UUID; +import java.util.concurrent.CompletionException; + +// snippet-start:[entityres.java2_scenario.main] +public class EntityResScenario { + private static final Logger logger = LoggerFactory.getLogger(EntityResScenario.class); + public static final String DASHES = new String(new char[80]).replace("\0", "-"); + private static final String STACK_NAME = "EntityResolutionCdkStack"; + private static final String ENTITY_RESOLUTION_ROLE_ARN_KEY = "EntityResolutionRoleArn"; + private static final String GLUE_DATA_BUCKET_NAME_KEY = "GlueDataBucketName"; + private static final String JSON_GLUE_TABLE_ARN_KEY = "JsonErGlueTableArn"; + private static final String CSV_GLUE_TABLE_ARN_KEY = "CsvErGlueTableArn"; + private static String glueBucketName; + private static String workflowName = "workflow-" + UUID.randomUUID(); + + private static String jsonSchemaMappingName = "jsonschema-" + UUID.randomUUID(); + private static String jsonSchemaMappingArn = null; + private static String csvSchemaMappingName = "csv-" + UUID.randomUUID(); + private static String roleARN; + private static String csvGlueTableArn; + private static String jsonGlueTableArn; + private static Scanner scanner = new Scanner(System.in); + + private static EntityResActions actions = new EntityResActions(); + + public static void main(String[] args) throws InterruptedException { + + logger.info("Welcome to the AWS Entity Resolution Scenario."); + logger.info(""" + AWS Entity Resolution is a fully-managed machine learning service provided by + Amazon Web Services (AWS) that helps organizations extract, link, and + organize information from multiple data sources. It leverages natural + language processing and deep learning models to identify and resolve + entities, such as people, places, organizations, and products, + across structured and unstructured data. + + With Entity Resolution, customers can build robust data integration + pipelines to combine and reconcile data from multiple systems, databases, + and documents. The service can handle ambiguous, incomplete, or conflicting + information, and provide a unified view of entities and their relationships. + This can be particularly valuable in applications such as customer 360, + fraud detection, supply chain management, and knowledge management, where + accurate entity identification is crucial. + + The `EntityResolutionAsyncClient` interface in the AWS SDK for Java 2.x + provides a set of methods to programmatically interact with the AWS Entity + Resolution service. This allows developers to automate the entity extraction, + linking, and deduplication process as part of their data processing workflows. + With Entity Resolution, organizations can unlock the value of their data, + improve decision-making, and enhance customer experiences by having a reliable, + comprehensive view of their key entities. + """); + + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info(""" + To prepare the AWS resources needed for this scenario application, the next step uploads + a CloudFormation template whose resulting stack creates the following resources: + - An AWS Glue Data Catalog table + - An AWS IAM role + - An AWS S3 bucket + - An AWS Entity Resolution Schema + + It can take a couple minutes for the Stack to finish creating the resources. + """); + waitForInputToContinue(scanner); + logger.info("Generating resources..."); + CloudFormationHelper.deployCloudFormationStack(STACK_NAME); + Map outputsMap = CloudFormationHelper.getStackOutputsAsync(STACK_NAME).join(); + roleARN = outputsMap.get(ENTITY_RESOLUTION_ROLE_ARN_KEY); + glueBucketName = outputsMap.get(GLUE_DATA_BUCKET_NAME_KEY); + csvGlueTableArn = outputsMap.get(CSV_GLUE_TABLE_ARN_KEY); + jsonGlueTableArn = outputsMap.get(JSON_GLUE_TABLE_ARN_KEY); + logger.info(DASHES); + waitForInputToContinue(scanner); + + try { + runScenario(); + + } catch (Exception ce) { + Throwable cause = ce.getCause(); + logger.error("An exception happened: " + (cause != null ? cause.getMessage() : ce.getMessage())); + } + } + + private static void runScenario() throws InterruptedException { + /* + This JSON is a valid input for the AWS Entity Resolution service. + The JSON represents an array of three objects, each containing an "id", "name", and "email" + property. This format aligns with the expected input structure for the + Entity Resolution service. + */ + String json = """ + {"id":"1","name":"Jane Doe","email":"jane.doe@example.com"} + {"id":"2","name":"John Doe","email":"john.doe@example.com"} + {"id":"3","name":"Jorge Souza","email":"jorge_souza@example.com"} + """; + logger.info("Upload the following JSON objects to the {} S3 bucket.", glueBucketName); + logger.info(json); + String csv = """ + id,name,email,phone + 1,Jane B.,Doe,jane.doe@example.com,555-876-9846 + 2,John Doe Jr.,john.doe@example.com,555-654-3210 + 3,María García,maría_garcia@company.com,555-567-1234 + 4,Mary Major,mary_major@company.com,555-222-3333 + """; + logger.info("Upload the following CSV data to the {} S3 bucket.", glueBucketName); + logger.info(csv); + waitForInputToContinue(scanner); + try { + actions.uploadInputData(glueBucketName, json, csv); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + + if (cause == null) { + logger.error("Failed to upload input data: {}", ce.getMessage(), ce); + } + + if (cause instanceof ResourceNotFoundException) { + logger.error("Failed to upload input data as the resource was not found: {}", cause.getMessage(), cause); + } + return; + } + logger.info("The JSON and CSV objects have been uploaded to the S3 bucket."); + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("1. Create Schema Mapping"); + logger.info(""" + Entity Resolution schema mapping aligns and integrates data from + multiple sources by identifying and matching corresponding entities + like customers or products. It unifies schemas, resolves conflicts, + and uses machine learning to link related entities, enabling a + consolidated, accurate view for improved data quality and decision-making. + + In this example, the schema mapping lines up with the fields in the JSON and CSV objects. That is, + it contains these fields: id, name, and email. + """); + try { + CreateSchemaMappingResponse response = actions.createSchemaMappingAsync(jsonSchemaMappingName).join(); + jsonSchemaMappingName = response.schemaName(); + logger.info("The JSON schema mapping name is " + jsonSchemaMappingName); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + + if (cause == null) { + logger.error("Failed to create JSON schema mapping: {}", ce.getMessage(), ce); + } + + if (cause instanceof ConflictException) { + logger.error("Schema mapping conflict detected: {}", cause.getMessage(), cause); + } else { + logger.error("Unexpected error while creating schema mapping: {}", cause.getMessage(), cause); + } + return; + } + + try { + CreateSchemaMappingResponse response = actions.createSchemaMappingAsync(csvSchemaMappingName).join(); + csvSchemaMappingName = response.schemaName(); + logger.info("The CSV schema mapping name is " + csvSchemaMappingName); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + if (cause == null) { + logger.error("Failed to create CSV schema mapping: {}", ce.getMessage(), ce); + } + + if (cause instanceof ConflictException) { + logger.error("Schema mapping conflict detected: {}", cause.getMessage(), cause); + } else { + logger.error("Unexpected error while creating CSV schema mapping: {}", cause.getMessage(), cause); + } + return; + } + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("2. Create an AWS Entity Resolution Workflow. "); + logger.info(""" + An Entity Resolution matching workflow identifies and links records + across datasets that represent the same real-world entity, such as + customers or products. Using techniques like schema mapping, + data profiling, and machine learning algorithms, + it evaluates attributes like names or emails to detect duplicates + or relationships, even with variations or inconsistencies. + The workflow outputs consolidated, de-duplicated data. + + We will use the machine learning-based matching technique. + """); + waitForInputToContinue(scanner); + try { + String workflowArn = actions.createMatchingWorkflowAsync( + roleARN, workflowName, glueBucketName, jsonGlueTableArn, + jsonSchemaMappingName, csvGlueTableArn, csvSchemaMappingName).join(); + + logger.info("The workflow ARN is: " + workflowArn); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + + if (cause == null) { + logger.error("An unexpected error occurred: {}", ce.getMessage(), ce); + } + + if (cause instanceof ValidationException) { + logger.error("Validation error: {}", cause.getMessage(), cause); + } else if (cause instanceof ConflictException) { + logger.error("Workflow conflict detected: {}", cause.getMessage(), cause); + } else { + logger.error("Unexpected error: {}", cause.getMessage(), cause); + } + return; + } + + waitForInputToContinue(scanner); + logger.info(DASHES); + logger.info("3. Start the matching job of the " + workflowName + " workflow."); + waitForInputToContinue(scanner); + String jobId = null; + try { + jobId = actions.startMatchingJobAsync(workflowName).join(); + logger.info("The matching job was successfully started."); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + if (cause instanceof ConflictException) { + logger.error("Job conflict detected: {}", cause.getMessage(), cause); + } else { + logger.error("Unexpected error while starting the job: {}", ce.getMessage(), ce); + } + return; + } + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("4. While the matching job is running, let's look at other API methods. First, let's get details for job " + jobId); + waitForInputToContinue(scanner); + try { + actions.getMatchingJobAsync(jobId, workflowName).join(); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + if (cause instanceof ResourceNotFoundException) { + logger.error("The matching job not found: {}", cause.getMessage(), cause); + } else { + logger.error("Failed to start matching job: " + (cause != null ? cause.getMessage() : ce.getMessage())); + } + return; + } + logger.info(DASHES); + + logger.info(DASHES); + logger.info("5. Get the schema mapping for the JSON data."); + waitForInputToContinue(scanner); + try { + GetSchemaMappingResponse response = actions.getSchemaMappingAsync(jsonSchemaMappingName).join(); + jsonSchemaMappingArn = response.schemaArn(); + logger.info("Schema mapping ARN is " + jsonSchemaMappingArn); + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + if (cause instanceof ResourceNotFoundException) { + logger.error("Schema mapping not found: {}", cause.getMessage(), cause); + } else { + logger.error("Error retrieving the specific schema mapping: " + ce.getCause().getMessage()); + } + return; + } + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("6. List Schema Mappings."); + try { + actions.ListSchemaMappings(); + } catch (CompletionException ce) { + logger.error("Error retrieving schema mappings: " + ce.getCause().getMessage()); + return; + } + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("7. Tag the {} resource.", jsonSchemaMappingName); + logger.info(""" + Tags can help you organize and categorize your Entity Resolution resources. + You can also use them to scope user permissions by granting a user permission + to access or change only resources with certain tag values. + In Entity Resolution, SchemaMapping and MatchingWorkflow can be tagged. For this example, + the SchemaMapping is tagged. + """); + try { + actions.tagEntityResource(jsonSchemaMappingArn).join(); + } catch (CompletionException ce) { + logger.error("Error tagging the resource: " + ce.getCause().getMessage()); + return; + } + + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("8. View the results of the AWS Entity Resolution Workflow."); + logger.info(""" + You cannot view the result of the workflow that is in a running state. + In order to view the results, you need to wait for the workflow that we started in step 3 to complete. + + If you choose not to wait, you cannot view the results. You can perform + this task manually in the AWS Management Console. + + This can take up to 30 mins (y/n). + """); + String viewAns = scanner.nextLine().trim(); + boolean isComplete = false; + if (viewAns.equalsIgnoreCase("y")) { + logger.info("You selected to view the Entity Resolution Workflow results."); + countdownWithWorkflowCheck(actions, 1800, jobId, workflowName); + isComplete = true; + try { + JobMetrics metrics = actions.getJobInfo(workflowName, jobId).join(); + logger.info("Number of input records: {}", metrics.inputRecords()); + logger.info("Number of match ids: {}", metrics.matchIDs()); + logger.info("Number of records not processed: {}", metrics.recordsNotProcessed()); + logger.info("Number of total records processed: {}", metrics.totalRecordsProcessed()); + logger.info("The following represents the output data generated by the Entity Resolution workflow based on the JSON and CSV input data. The output data is stored in the {} bucket.", glueBucketName); + actions.printData(glueBucketName); + + logger.info(""" + + Note that each of the last 2 records are considered a match even though the 'name' differs between the records; + For example 'John Doe Jr.' compared to 'John Doe'. + The confidence level is a value between 0 and 1, where 1 indicates a perfect match. + + """); + + } catch (CompletionException ce) { + Throwable cause = ce.getCause(); + if (cause instanceof ResourceNotFoundException) { + logger.error("The job not found: {}", cause.getMessage(), cause); + } else { + logger.error("Error retrieving job information: " + ce.getCause().getMessage()); + } + return; + } + } + + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("9. Do you want to delete the resources, including the workflow? (y/n)"); + logger.info(""" + You cannot delete the workflow that is in a running state. + In order to delete the workflow, you need to wait for the workflow to complete. + + You can delete the workflow manually in the AWS Management Console at a later time. + + If you already waited for the workflow to complete in the previous step, + the workflow is completed and you can delete it. + + If the workflow is not completed, this can take up to 30 mins (y/n). + """); + String delAns = scanner.nextLine().trim(); + if (delAns.equalsIgnoreCase("y")) { + try { + if (!isComplete) { + countdownWithWorkflowCheck(actions, 1800, jobId, workflowName); + } + actions.deleteMatchingWorkflowAsync(workflowName).join(); + logger.info("Workflow deleted successfully!"); + } catch (CompletionException ce) { + logger.info("Error deleting the workflow: {} ", ce.getMessage()); + return; + } + + try { + // Delete both schema mappings. + actions.deleteSchemaMappingAsync(jsonSchemaMappingName).join(); + actions.deleteSchemaMappingAsync(csvSchemaMappingName).join(); + logger.info("Both schema mappings were deleted successfully!"); + } catch (CompletionException ce) { + logger.error("Error deleting schema mapping: {}", ce.getMessage()); + return; + } + + waitForInputToContinue(scanner); + logger.info(DASHES); + logger.info(""" + Now we delete the CloudFormation stack, which deletes + the resources that were created at the beginning of this scenario. + """); + waitForInputToContinue(scanner); + logger.info(DASHES); + try { + deleteCloudFormationStack(); + } catch (RuntimeException e) { + logger.error("Failed to delete the stack: {}", e.getMessage()); + return; + } + + } else { + logger.info("You can delete the AWS resources in the AWS Management Console."); + } + + waitForInputToContinue(scanner); + logger.info(DASHES); + + logger.info(DASHES); + logger.info("This concludes the AWS Entity Resolution scenario."); + logger.info(DASHES); + } + + private static void waitForInputToContinue(Scanner scanner) { + while (true) { + logger.info(""); + logger.info("Enter 'c' followed by to continue:"); + String input = scanner.nextLine(); + + if (input.trim().equalsIgnoreCase("c")) { + logger.info("Continuing with the program..."); + logger.info(""); + break; + } else { + // Handle invalid input. + logger.info("Invalid input. Please try again."); + } + } + } + + public static void countdownWithWorkflowCheck(EntityResActions actions, int totalSeconds, String jobId, String workflowName) throws InterruptedException { + int secondsElapsed = 0; + + while (true) { + // Calculate display minutes and seconds. + int remainingTime = totalSeconds - secondsElapsed; + int displayMinutes = remainingTime / 60; + int displaySeconds = remainingTime % 60; + + // Print the countdown. + System.out.printf("\r%02d:%02d", displayMinutes, displaySeconds); + Thread.sleep(1000); // Wait for 1 second + secondsElapsed++; + + // Check workflow status every 60 seconds. + if (secondsElapsed % 60 == 0 || remainingTime <= 0) { + GetMatchingJobResponse response = actions.checkWorkflowStatusCompleteAsync(jobId, workflowName).join(); + if (response != null && "SUCCEEDED".equalsIgnoreCase(String.valueOf(response.status()))) { + logger.info(""); // Move to the next line after countdown. + logger.info("Countdown complete: Workflow is in Completed state!"); + break; // Break out of the loop if the status is "SUCCEEDED" + } + } + + // If countdown reaches zero, reset it for continuous countdown. + if (remainingTime <= 0) { + secondsElapsed = 0; + } + } + } + + private static void deleteCloudFormationStack() { + try { + CloudFormationHelper.emptyS3Bucket(glueBucketName); + CloudFormationHelper.destroyCloudFormationStack(STACK_NAME); + logger.info("Resources deleted successfully!"); + } catch (CloudFormationException e) { + throw new RuntimeException("Failed to delete CloudFormation stack: " + e.getMessage(), e); + } catch (S3Exception e) { + throw new RuntimeException("Failed to empty S3 bucket: " + e.getMessage(), e); + } + } +} +// snippet-end:[entityres.java2_scenario.main] \ No newline at end of file diff --git a/javav2/example_code/entityresolution/src/main/resources/log4j2.xml b/javav2/example_code/entityresolution/src/main/resources/log4j2.xml new file mode 100644 index 00000000000..225afe2b3a8 --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/resources/log4j2.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/javav2/example_code/entityresolution/src/main/resources/template.yaml b/javav2/example_code/entityresolution/src/main/resources/template.yaml new file mode 100644 index 00000000000..f0395929fa7 --- /dev/null +++ b/javav2/example_code/entityresolution/src/main/resources/template.yaml @@ -0,0 +1,263 @@ +Resources: + ErBucket6EA35F9D: + Type: AWS::S3::Bucket + Properties: + BucketName: erbucketf684533d2680435fa99d24b1bdaf5179 + UpdateReplacePolicy: Delete + DeletionPolicy: Delete + Metadata: + aws:cdk:path: EntityResolutionCdkStack/ErBucket/Resource + GlueDatabase: + Type: AWS::Glue::Database + Properties: + CatalogId: + Ref: AWS::AccountId + DatabaseInput: + Name: entity_resolution_db + Metadata: + aws:cdk:path: EntityResolutionCdkStack/GlueDatabase + jsongluetable: + Type: AWS::Glue::Table + Properties: + CatalogId: + Ref: AWS::AccountId + DatabaseName: + Ref: GlueDatabase + TableInput: + Name: jsongluetable + StorageDescriptor: + Columns: + - Name: id + Type: string + - Name: name + Type: string + - Name: email + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + Location: + Fn::Join: + - "" + - - s3:// + - Ref: ErBucket6EA35F9D + - /jsonData/ + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + SerdeInfo: + Parameters: + serialization.format: "1" + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + TableType: EXTERNAL_TABLE + DependsOn: + - GlueDatabase + Metadata: + aws:cdk:path: EntityResolutionCdkStack/jsongluetable + csvgluetable: + Type: AWS::Glue::Table + Properties: + CatalogId: + Ref: AWS::AccountId + DatabaseName: + Ref: GlueDatabase + TableInput: + Name: csvgluetable + StorageDescriptor: + Columns: + - Name: id + Type: string + - Name: name + Type: string + - Name: email + Type: string + - Name: phone + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + Location: + Fn::Join: + - "" + - - s3:// + - Ref: ErBucket6EA35F9D + - /csvData/ + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + SerdeInfo: + Parameters: + serialization.format: "1" + SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableType: EXTERNAL_TABLE + DependsOn: + - GlueDatabase + Metadata: + aws:cdk:path: EntityResolutionCdkStack/csvgluetable + EntityResolutionRoleB51A51D3: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Statement: + - Action: sts:AssumeRole + Effect: Allow + Principal: + Service: entityresolution.amazonaws.com + Version: "2012-10-17" + ManagedPolicyArns: + - Fn::Join: + - "" + - - "arn:" + - Ref: AWS::Partition + - :iam::aws:policy/AmazonS3FullAccess + - Fn::Join: + - "" + - - "arn:" + - Ref: AWS::Partition + - :iam::aws:policy/AWSEntityResolutionConsoleFullAccess + - Fn::Join: + - "" + - - "arn:" + - Ref: AWS::Partition + - :iam::aws:policy/AWSGlueConsoleFullAccess + - Fn::Join: + - "" + - - "arn:" + - Ref: AWS::Partition + - :iam::aws:policy/service-role/AWSGlueServiceRole + Metadata: + aws:cdk:path: EntityResolutionCdkStack/EntityResolutionRole/Resource + EntityResolutionRoleDefaultPolicy586C8066: + Type: AWS::IAM::Policy + Properties: + PolicyDocument: + Statement: + - Action: + - entityresolution:GetMatchingWorkflow + - entityresolution:StartMatchingWorkflow + Effect: Allow + Resource: "*" + Version: "2012-10-17" + PolicyName: EntityResolutionRoleDefaultPolicy586C8066 + Roles: + - Ref: EntityResolutionRoleB51A51D3 + Metadata: + aws:cdk:path: EntityResolutionCdkStack/EntityResolutionRole/DefaultPolicy/Resource + CDKMetadata: + Type: AWS::CDK::Metadata + Properties: + Analytics: v2:deflate64:H4sIAAAAAAAA/02MzQ7CIBCEn6V3WPuTvoD15EVTvZstRbOWgimgMYR3t4WLp5n5ZjI1VE0LZYEfy8U4cUUDhItDMbEV3YJtIOy9mKRj3V1nF9lDeQlhBQd0OKCVW3nFQcnICGcIvVGJJT0bReK7xexiZL20xi8ibU7evXy6/6ed0SM5MjqyI75xV1dQQls8LRFfvHY0S+iz/gCPIXoRxAAAAA== + Metadata: + aws:cdk:path: EntityResolutionCdkStack/CDKMetadata/Default + Condition: CDKMetadataAvailable +Outputs: + EntityResolutionRoleArn: + Description: The ARN of the EntityResolution Role + Value: + Fn::GetAtt: + - EntityResolutionRoleB51A51D3 + - Arn + JsonErGlueTableArn: + Description: The ARN of the Json Glue Table + Value: + Fn::Join: + - "" + - - "arn:aws:glue:" + - Ref: AWS::Region + - ":" + - Ref: AWS::AccountId + - :table/ + - Ref: GlueDatabase + - /jsongluetable + CsvErGlueTableArn: + Description: The ARN of the CSV Glue Table + Value: + Fn::Join: + - "" + - - "arn:aws:glue:" + - Ref: AWS::Region + - ":" + - Ref: AWS::AccountId + - :table/ + - Ref: GlueDatabase + - /csvgluetable + GlueDataBucketName: + Description: The name of the Glue Data Bucket + Value: + Ref: ErBucket6EA35F9D +Conditions: + CDKMetadataAvailable: + Fn::Or: + - Fn::Or: + - Fn::Equals: + - Ref: AWS::Region + - af-south-1 + - Fn::Equals: + - Ref: AWS::Region + - ap-east-1 + - Fn::Equals: + - Ref: AWS::Region + - ap-northeast-1 + - Fn::Equals: + - Ref: AWS::Region + - ap-northeast-2 + - Fn::Equals: + - Ref: AWS::Region + - ap-south-1 + - Fn::Equals: + - Ref: AWS::Region + - ap-southeast-1 + - Fn::Equals: + - Ref: AWS::Region + - ap-southeast-2 + - Fn::Equals: + - Ref: AWS::Region + - ca-central-1 + - Fn::Equals: + - Ref: AWS::Region + - cn-north-1 + - Fn::Equals: + - Ref: AWS::Region + - cn-northwest-1 + - Fn::Or: + - Fn::Equals: + - Ref: AWS::Region + - eu-central-1 + - Fn::Equals: + - Ref: AWS::Region + - eu-north-1 + - Fn::Equals: + - Ref: AWS::Region + - eu-south-1 + - Fn::Equals: + - Ref: AWS::Region + - eu-west-1 + - Fn::Equals: + - Ref: AWS::Region + - eu-west-2 + - Fn::Equals: + - Ref: AWS::Region + - eu-west-3 + - Fn::Equals: + - Ref: AWS::Region + - il-central-1 + - Fn::Equals: + - Ref: AWS::Region + - me-central-1 + - Fn::Equals: + - Ref: AWS::Region + - me-south-1 + - Fn::Equals: + - Ref: AWS::Region + - sa-east-1 + - Fn::Or: + - Fn::Equals: + - Ref: AWS::Region + - us-east-1 + - Fn::Equals: + - Ref: AWS::Region + - us-east-2 + - Fn::Equals: + - Ref: AWS::Region + - us-west-1 + - Fn::Equals: + - Ref: AWS::Region + - us-west-2 +Parameters: + BootstrapVersion: + Type: AWS::SSM::Parameter::Value + Default: /cdk-bootstrap/hnb659fds/version + Description: Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip] + diff --git a/javav2/example_code/entityresolution/src/test/java/EntityResTests.java b/javav2/example_code/entityresolution/src/test/java/EntityResTests.java new file mode 100644 index 00000000000..03f2c75980d --- /dev/null +++ b/javav2/example_code/entityresolution/src/test/java/EntityResTests.java @@ -0,0 +1,187 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +import com.example.entity.scenario.CloudFormationHelper; +import com.example.entity.scenario.EntityResActions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; +import software.amazon.awssdk.services.entityresolution.model.CreateSchemaMappingResponse; + +import java.util.Map; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@TestInstance(TestInstance.Lifecycle.PER_METHOD) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class EntityResTests { + private static final Logger logger = LoggerFactory.getLogger(EntityResTests.class); + + private static String roleARN = ""; + + private static String csvMappingARN = ""; + private static String jsonMappingARN = ""; + + private static String jobId = ""; + + private static String glueBucketName = ""; + + private static String csvGlueTableArn = ""; + + private static String jsonGlueTableArn = ""; + + private static final String STACK_NAME = "EntityResolutionCdkStack"; + + private static final String ENTITY_RESOLUTION_ROLE_ARN_KEY = "EntityResolutionRoleArn"; + private static final String GLUE_DATA_BUCKET_NAME_KEY = "GlueDataBucketName"; + private static final String JSON_GLUE_TABLE_ARN_KEY = "JsonErGlueTableArn"; + private static final String CSV_GLUE_TABLE_ARN_KEY = "CsvErGlueTableArn"; + + private static String workflowArn = ""; + private static final String jsonSchemaMappingName = "jsonschema-" + UUID.randomUUID(); + private static final String csvSchemaMappingName = "csv-" + UUID.randomUUID(); + private static final String workflowName = "workflow-"+ UUID.randomUUID(); + private static final EntityResActions actions = new EntityResActions(); + @BeforeAll + public static void setUp() { + CloudFormationHelper.deployCloudFormationStack(STACK_NAME); + Map outputsMap = CloudFormationHelper.getStackOutputsAsync(STACK_NAME).join(); + roleARN = outputsMap.get(ENTITY_RESOLUTION_ROLE_ARN_KEY); + glueBucketName = outputsMap.get(GLUE_DATA_BUCKET_NAME_KEY); + csvGlueTableArn = outputsMap.get(CSV_GLUE_TABLE_ARN_KEY); + jsonGlueTableArn = outputsMap.get(JSON_GLUE_TABLE_ARN_KEY); + + String json = """ + [ + { + "id": "1", + "name": "Alice Johnson", + "email": "alice.johnson@example.com" + }, + { + "id": "2", + "name": "Bob Smith", + "email": "bob.smith@example.com" + }, + { + "id": "3", + "name": "Charlie Black", + "email": "charlie.black@example.com" + } + ] + """; + + String csv = """ + id,name,email,phone + 1,Alice B. Johnson,alice.johnson@example.com,746-876-9846 + 2,Bob Smith Jr.,bob.smith@example.com,987-654-3210 + 3,Charlie Black,charlie.black@company.com,345-567-1234 + 7,Jane E. Doe,jane_doe@company.com,111-222-3333 + """; + + actions.uploadInputData(glueBucketName, json, csv); + } + + @Test + @Tag("IntegrationTest") + @Order(1) + public void testCreateMapping() { + assertDoesNotThrow(() -> { + CreateSchemaMappingResponse response = actions.createSchemaMappingAsync(jsonSchemaMappingName).join(); + jsonMappingARN = response.schemaArn(); + assertNotNull(jsonMappingARN); + }); + + assertDoesNotThrow(() -> { + CreateSchemaMappingResponse response = actions.createSchemaMappingAsync(csvSchemaMappingName).join(); + csvMappingARN = response.schemaArn(); + assertNotNull(csvMappingARN); + }); + logger.info("Test 1 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(2) + public void testCreateMappingWorkflow() { + assertDoesNotThrow(() -> { + workflowArn = actions.createMatchingWorkflowAsync(roleARN, workflowName, glueBucketName, jsonGlueTableArn, jsonSchemaMappingName, csvGlueTableArn, csvSchemaMappingName).join(); + assertNotNull(workflowArn); + }); + logger.info("Test 2 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(3) + public void testStartWorkflow() { + assertDoesNotThrow(() -> { + jobId = actions.startMatchingJobAsync(workflowName).join(); + assertNotNull(workflowArn); + }); + logger.info("Test 3 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(4) + public void testGetJobDetails() { + assertDoesNotThrow(() -> { + actions.getMatchingJobAsync(jobId, workflowName).join(); + }); + logger.info("Test 4 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(5) + public void testtSchemaMappingDetails() { + assertDoesNotThrow(() -> { + actions.getSchemaMappingAsync(jsonSchemaMappingName).join(); + }); + logger.info("Test 5 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(6) + public void testListSchemaMappings() { + assertDoesNotThrow(actions::ListSchemaMappings); + logger.info("Test 6 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(7) + public void testLTagResources() { + assertDoesNotThrow(() -> { + actions.tagEntityResource(csvMappingARN).join(); + }); + logger.info("Test 7 passed"); + } + + @Test + @Tag("IntegrationTest") + @Order(8) + public void testLDeleteMapping() { + assertDoesNotThrow(() -> { + logger.info("Wait 30 mins for the workflow to complete"); + Thread.sleep(1800000); + actions.deleteMatchingWorkflowAsync(workflowName).join(); + actions.deleteSchemaMappingAsync(jsonSchemaMappingName).join(); + actions.deleteSchemaMappingAsync(csvSchemaMappingName).join(); + CloudFormationHelper.emptyS3Bucket(glueBucketName); + CloudFormationHelper.destroyCloudFormationStack(STACK_NAME); + }); + logger.info("Test 8 passed"); + } +} diff --git a/scenarios/basics/entity_resolution/README.md b/scenarios/basics/entity_resolution/README.md new file mode 100644 index 00000000000..14679cbcc52 --- /dev/null +++ b/scenarios/basics/entity_resolution/README.md @@ -0,0 +1,53 @@ +# AWS Entity Resolution Program + +## Overview +This AWS Entity Resolution basic scenario demonstrates how to interact with the AWS Entity Resolution service using an AWS SDK. This application demonstrates how to use AWS Entity Resolution to integrate and deduplicate data from multiple sources using machine learning-based matching. The program walks through setting up AWS resources, uploading structured data, defining schema mappings, creating a matching workflow, and running a matching job. + + +**Note:** See the [specification document](SPECIFICATION.md) for a complete list of operations. + +## Features + +1. Uses AWS CloudFormation to create necessary resources: + +- AWS Glue Data Catalog table + +- AWS IAM role + +- AWS S3 bucket + +- AWS Entity Resolution Schema + +2. Uploads sample JSON and CSV data to S3 + +3. Creates schema mappings for JSON and CSV datasets + +4. Creates and starts an Entity Resolution matching workflow + +5. Retrieves job details and schema mappings + +6. Lists available schema mappings + +7. Tags AWS resources for better organization + +8. Views the results of the workflow + +## Resources + +This Basics scenario requires an IAM role that has permissions to work with the AWS Entity Resolution service, +an AWS Glue database, and an S3 bucket. A CDK script is provided to create these resources. +See the resources [Readme](../../../resources/cdk/entityresolution_resources/README.md) file. + +## Implementations + +This scenario example will be implemented in the following languages: + +- Java +- Python +- Kotlin + +## Additional Reading + +- [AWS Entity Resolution Documentation](https://docs.aws.amazon.com/entityresolution/latest/userguide/what-is-service.html) + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0 diff --git a/scenarios/basics/entity_resolution/SPECIFICATION.md b/scenarios/basics/entity_resolution/SPECIFICATION.md new file mode 100644 index 00000000000..1f0880688f4 --- /dev/null +++ b/scenarios/basics/entity_resolution/SPECIFICATION.md @@ -0,0 +1,448 @@ +# Specification for the AWS Entity Resolution Service Scenario + +## Overview + +This SDK Basics scenario demonstrates how to interact with AWS Entity Resolution +using an AWS SDK. It demonstrates various tasks such as creating a schema +mapping, creating an matching workflow, starting a workflow, and so on. Finally, +this scenario demonstrates how to clean up resources. + +## Resources + +This Basics scenario requires an IAM role that has permissions to work with the +AWS Entity Resolution service, an AWS Glue database and a table, and two S3 +buckets. +A [CDK script](../../../resources/cdk/entityresolution_resources/README.md +) is provided to create these resources. + +## Hello AWS Entity Resolution + +This program is intended for users not familiar with the AWS Entity Resolution +Service to easily get up and running. The program uses a +`listMatchingWorkflowsPaginator` to demonstrate how you can read through +workflow information. + +## Basics Scenario Program Flow + +The AWS Entity Resolution Basics scenario executes the following operations. + +1. **Create a schema mapping**: + - Description: Creates a schema mapping by invoking the + `createSchemaMapping` method. + - Exception Handling: Check to see if a `ConflictException` is thrown, which + indicates that the schema mapping already exists. If the exception is + thrown, display the information and end the program. + +2. **Create a Matching Workflow**: + - Description: Creates a new matching workflow that defines how entities + should be resolved and matched. The method `createMatchingWorkflow` is + called. + - Exception Handling: Check to see if a `ConflictException` is thrown, which + is thrown if the matching workflow already exists. ALso check to see if a `ValidationException` is thrown. If so, display the message and end the program. + +3. **Start Matching Workflow**: + - Description: Initiates a matching workflow by calling the + `startMatchingJob` method to process entity resolution based on predefined + configurations. + - Exception Handling: Check to see if an `ConflictException` is thrown, + which indicates that the matching workflow job is already running. If the + exception is thrown, display the message and end the program. + +4. **Get Workflow Job Details**: + - Description: Retrieves details about a specific matching workflow job by + calling the `getMatchingJob` method. + - Exception Handling: Check to see if an `ResourceNotFoundException` is + thrown, which indicates that the workflow cannot be found. If the + exception is thrown, display the message and end the program. + +5. **Get Schema Mapping**: + - Description: Returns the `SchemaMapping` of a given name by calling the + `getSchemaMapping` method. + - Exception Handling: Check to see if a `ResourceNotFoundException` is + thrown. If so, display the message and end the program. + +6. **List Matching Workflows**: + - Description: Lists all matching workflows created within the account by + calling the `listMatchingWorkflows` method. + - Exception Handling: Check to see if an `CompletionException` is thrown. If + so, display the message and end the program. + +7. **Tag Resource**: + - Description: Adds tags associated with an AWS Entity Resolution resource + by calling the`tagResource` method. + - Exception Handling: Check to see if a `ResourceNotFoundException` is + thrown. If so, display the message and end the program +8. **View the results of the AWS Entity Resolution Workflow**: + - Description: View the workflow results by calling the + `getMatchingJob` method. + - Exception Handling: Check to see if an `ResourceNotFoundException` is thrown. If + so, display the message and end the program. + +9. **Delete the AWS resources**: + - Description: Delete the AWS resouces including the workflow and schema mappings by calling the + `deleteMatchingWorkflow` and `deleteSchemaMapping` methods. + - Exception Handling: Check to see if an `ResourceNotFoundException` is thrown. If + so, display the message and end the program. + - Finally delete the CloudFormation Stack by calling these method: + - CloudFormationHelper.emptyS3Bucket(glueBucketName); + - CloudFormationHelper.destroyCloudFormationStack + +### Program execution + +The following shows the output of the AWS Entity Resolution Basics scenario in +the console. + +``` +Welcome to the AWS Entity Resolution Scenario. +AWS Entity Resolution is a fully-managed machine learning service provided by +Amazon Web Services (AWS) that helps organizations extract, link, and +organize information from multiple data sources. It leverages natural +language processing and deep learning models to identify and resolve +entities, such as people, places, organizations, and products, +across structured and unstructured data. + +With Entity Resolution, customers can build robust data integration +pipelines to combine and reconcile data from multiple systems, databases, +and documents. The service can handle ambiguous, incomplete, or conflicting +information, and provide a unified view of entities and their relationships. +This can be particularly valuable in applications such as customer 360, +fraud detection, supply chain management, and knowledge management, where +accurate entity identification is crucial. + +The `EntityResolutionAsyncClient` interface in the AWS SDK for Java 2.x +provides a set of methods to programmatically interact with the AWS Entity +Resolution service. This allows developers to automate the entity extraction, +linking, and deduplication process as part of their data processing workflows. +With Entity Resolution, organizations can unlock the value of their data, +improve decision-making, and enhance customer experiences by having a reliable, +comprehensive view of their key entities. + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +To prepare the AWS resources needed for this scenario application, the next step uploads +a CloudFormation template whose resulting stack creates the following resources: +- An AWS Glue Data Catalog table +- An AWS IAM role +- An AWS S3 bucket +- An AWS Entity Resolution Schema + +It can take a couple minutes for the Stack to finish creating the resources. + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Generating resources... +Stack creation requested, ARN is arn:aws:cloudformation:us-east-1:814548047983:stack/EntityResolutionCdkStack/858988e0-f604-11ef-916b-0affc298c80f +Stack created successfully +-------------------------------------------------------------------------------- + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Upload the following JSON objects to the erbucketf684533d2680435fa99d24b1bdaf5179 S3 bucket. +{"id":"1","name":"Jane Doe","email":"jane.doe@example.com"} +{"id":"2","name":"John Doe","email":"john.doe@example.com"} +{"id":"3","name":"Jorge Souza","email":"jorge_souza@example.com"} + +Upload the following CSV data to the erbucketf684533d2680435fa99d24b1bdaf5179 S3 bucket. +id,name,email,phone +1,Jane B.,Doe,jane.doe@example.com,555-876-9846 +2,John Doe Jr.,john.doe@example.com,555-654-3210 +3,María García,maría_garcia@company.com,555-567-1234 +4,Mary Major,mary_major@company.com,555-222-3333 + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +The JSON and CSV objects have been uploaded to the S3 bucket. + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +1. Create Schema Mapping +Entity Resolution schema mapping aligns and integrates data from +multiple sources by identifying and matching corresponding entities +like customers or products. It unifies schemas, resolves conflicts, +and uses machine learning to link related entities, enabling a +consolidated, accurate view for improved data quality and decision-making. + +In this example, the schema mapping lines up with the fields in the JSON and CSV objects. That is, +it contains these fields: id, name, and email. + +[jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2] schema mapping Created Successfully! +The JSON schema mapping name is jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2 +[csv-8d05576d-66bb-4fcf-a29c-8c3de57ce48c] schema mapping Created Successfully! +The CSV schema mapping name is csv-8d05576d-66bb-4fcf-a29c-8c3de57ce48c + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +2. Create an AWS Entity Resolution Workflow. +An Entity Resolution matching workflow identifies and links records +across datasets that represent the same real-world entity, such as +customers or products. Using techniques like schema mapping, +data profiling, and machine learning algorithms, +it evaluates attributes like names or emails to detect duplicates +or relationships, even with variations or inconsistencies. +The workflow outputs consolidated, de-duplicated data. + +We will use the machine learning-based matching technique. + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Workflow created successfully. +The workflow ARN is: arn:aws:entityresolution:us-east-1:814548047983:matchingworkflow/workflow-39216b7f-f00b-4896-84ae-cd7edcfc7872 + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +3. Start the matching job of the workflow-39216b7f-f00b-4896-84ae-cd7edcfc7872 workflow. + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Job ID: f25d2707729646a4af27874d991e22c5 +The matching job was successfully started. + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +4. While the matching job is running, let's look at other API methods. First, let's get details for job f25d2707729646a4af27874d991e22c5 + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Job status: RUNNING +Job details: GetMatchingJobResponse(JobId=f25d2707729646a4af27874d991e22c5, StartTime=2025-02-28T18:49:14.921Z, Status=RUNNING) +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +5. Get the schema mapping for the JSON data. + +Enter 'c' followed by to continue: +c +Continuing with the program... + +Attribute Name: id, Attribute Type: UNIQUE_ID +Attribute Name: name, Attribute Type: NAME +Attribute Name: email, Attribute Type: EMAIL_ADDRESS +Schema mapping ARN is arn:aws:entityresolution:us-east-1:814548047983:schemamapping/jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2 + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +6. List Schema Mappings. +Schema Mapping Name: csv-33f8e392-74e7-4a08-900a-652b94f86250 +Schema Mapping Name: csv-3b68e38b-1d5c-4836-bfc7-92ac7339e5c7 +Schema Mapping Name: csv-4f547deb-56c1-4923-9119-556bc43df08d +Schema Mapping Name: csv-6fe8bbc3-ebb5-4800-ab49-a89f75a87905 +Schema Mapping Name: csv-812ecad3-3175-49c3-93a5-d3175396d6e7 +Schema Mapping Name: csv-8d05576d-66bb-4fcf-a29c-8c3de57ce48c +Schema Mapping Name: csv-90a464e1-f050-422c-8f5f-0726541a5858 +Schema Mapping Name: csv-ebad3e3d-27be-4ed4-ae35-7401265e57bd +Schema Mapping Name: csv-f752d395-857b-4106-b2f2-85e1da5e3040 +Schema Mapping Name: jsonschema-363dc915-0540-406e-8d3f-4f1435e0b942 +Schema Mapping Name: jsonschema-5b1ad3e1-a840-4c4f-b791-5e9e1893fe7e +Schema Mapping Name: jsonschema-8623e0ec-bb8c-4fe2-a998-609eae08d84d +Schema Mapping Name: jsonschema-93d5fd04-f10e-4274-a181-489bea7b92db +Schema Mapping Name: jsonschema-b1653c13-ce77-471d-a3d5-ae4877216a74 +Schema Mapping Name: jsonschema-c09b3414-384c-4e3d-90c8-61e48abde04d +Schema Mapping Name: jsonschema-d9a6edc0-a9bd-4553-bb71-fbf0d6064ef9 +Schema Mapping Name: jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2 +Schema Mapping Name: jsonschema-f0a259e0-f4e5-493a-bfd5-32740d2fa24d +Schema Mapping Name: schema2135 +Schema Mapping Name: schema435 +Schema Mapping Name: schema455 +Schema Mapping Name: schema456 +Schema Mapping Name: schema4648 +Schema Mapping Name: schema4720 +Schema Mapping Name: schema4848 +Schema Mapping Name: schema6758 +Schema Mapping Name: schema8775 +Schema Mapping Name: schemaName100 + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +7. Tag the jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2 resource. +Tags can help you organize and categorize your Entity Resolution resources. +You can also use them to scope user permissions by granting a user permission +to access or change only resources with certain tag values. +In Entity Resolution, SchemaMapping and MatchingWorkflow can be tagged. For this example, +the SchemaMapping is tagged. + +Successfully tagged the resource. + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +8. View the results of the AWS Entity Resolution Workflow. +You cannot view the result of the workflow that is in a running state. +In order to view the results, you need to wait for the workflow that we started in step 3 to complete. + +If you choose not to wait, you cannot view the results. You can perform +this task manually in the AWS Management Console. + +This can take up to 30 mins (y/n). + +y +You selected to view the Entity Resolution Workflow results. +29:01Job status: RUNNING +28:01Job status: RUNNING +27:01Job status: RUNNING +26:01Job status: RUNNING +25:01Job status: RUNNING +24:01Job status: RUNNING +23:01Job status: RUNNING +22:01Job status: RUNNING +21:01Job status: RUNNING +20:01Job status: RUNNING +19:01Job status: RUNNING +18:01Job status: RUNNING +17:01Job status: RUNNING +16:01Job status: RUNNING +15:01Job status: RUNNING +14:01Job status: RUNNING +13:01Job status: RUNNING +12:01Job status: RUNNING +11:01Job status: RUNNING +10:01Job status: RUNNING +09:01Job status: RUNNING +08:01Job status: RUNNING +07:01Job status: SUCCEEDED + +Countdown complete: Workflow is in Completed state! +Job metrics fetched successfully for jobId: f25d2707729646a4af27874d991e22c5 +Number of input records: 7 +Number of match ids: 6 +Number of records not processed: 0 +Number of total records processed: 7 +The following explains the output data generated by the Entity Resolution workflow. The output data is stored in the erbucketf684533d2680435fa99d24b1bdaf5179 bucket. + + ------------------------------------------------------------------------------ ----------------- ---- ------------------ --------------------------- -------------- ---------- --------------------------------------------------- + InputSourceARN ConfidenceLevel id name email phone RecordId MatchID + ------------------------------------------------------------------------------ ----------------- ---- ------------------ --------------------------- -------------- ---------- --------------------------------------------------- + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 7 Jane E. Doe jane_doe@company.com 111-222-3333 7 036298535ed6471ebfc358fc76e1f51200006472446402560 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.90523 2 Bob Smith Jr. bob.smith@example.com 987-654-3210 2 6ae2d360d6594089837eafc31b20f31600003506806140928 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.90523 2 Bob Smith bob.smith@example.com 2 6ae2d360d6594089837eafc31b20f31600003506806140928 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.89398956 1 Alice B. Johnson alice.johnson@example.com 746-876-9846 1 34a5075b289247efa1847ab292ed677400009137438953472 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.89398956 1 Alice Johnson alice.johnson@example.com 1 34a5075b289247efa1847ab292ed677400009137438953472 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.605295 3 Charlie Black charlie.black@company.com 345-567-1234 3 92c8ef3f68b34948a3af998d700ed02700002146028888064 + arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.605295 3 Charlie Black charlie.black@example.com 3 92c8ef3f68b34948a3af998d700ed02700002146028888064 + +Note that each of the last 3 pairs of records are considered a match even though the 'name' or 'email' differ between the records; +For example 'Bob Smith Jr.' compared to 'Bob Smith'. +The confidence level is a value between 0 and 1, where 1 indicates a perfect match. In the last pair of matched records, +the confidence level is lower for the differing email addresses. + + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +9. Do you want to delete the resources, including the workflow? (y/n) +You cannot delete the workflow that is in a running state. +In order to delete the workflow, you need to wait for the workflow to complete. + +You can delete the workflow manually in the AWS Management Console at a later time. + +If you already waited for the workflow to complete in the previous step, +the workflow is completed and you can delete it. + +If the workflow is not completed, this can take up to 30 mins (y/n). + +y +workflow-39216b7f-f00b-4896-84ae-cd7edcfc7872 was deleted +Workflow deleted successfully! +Schema mapping 'jsonschema-ef86075e-cf5e-4bb1-be50-e0f19743ddb2' deleted successfully. +Schema mapping 'csv-8d05576d-66bb-4fcf-a29c-8c3de57ce48c' deleted successfully. +Both schema mappings were deleted successfully! + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +Now we delete the CloudFormation stack, which deletes +the resources that were created at the beginning of this scenario. + + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +Delete stack requested .... +Stack deleted successfully. +Resources deleted successfully! + +Enter 'c' followed by to continue: +c +Continuing with the program... + +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +This concludes the AWS Entity Resolution scenario. +-------------------------------------------------------------------------------- + +``` + +## SOS Tags + +The following table describes the metadata used in this Basics Scenario. + +| action | metadata file | metadata key | +|------------------------|--------------------------------|--------------------------------------------| +| `createWorkflow` | entityresolution_metadata.yaml |entityresolution_CreateMatchingWorkflow | +| `createSchemaMapping` | entityresolution_metadata.yaml |entityresolution_CreateSchemaMapping | +| `startMatchingJob` | entityresolution_metadata.yaml |entityresolution_StartMatchingJob | +| `getMatchingJob` | entityresolution_metadata.yaml |entityresolution_GetMatchingJob | +| `listMatchingWorkflows`| entityresolution_metadata.yaml |entityresolution_ListMatchingWorkflows | +| `getSchemaMapping` | entityresolution_metadata.yaml |entityresolution_GetSchemaMapping | +| `listSchemaMappings` | entityresolution_metadata.yaml |entityresolution_ListSchemaMappings | +| `tagResource ` | entityresolution_metadata.yaml |entityresolution_TagEntityResource | +| `deleteWorkflow ` | entityresolution_metadata.yaml |ntityresolution_DeleteMatchingWorkflow | +| `deleteMapping ` | entityresolution_metadata.yaml |entityresolution_DeleteSchemaMapping | +| `listMappingJobs ` | entityresolution_metadata.yaml |entityresolution_Hello | +| `scenario` | entityresolution_metadata.yaml |entityresolution_Scenario | + + + +