|
| 1 | +/* |
| 2 | + * Copyright OpenSearch Contributors |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + * |
| 5 | + * The OpenSearch Contributors require contributions made to |
| 6 | + * this file be licensed under the Apache-2.0 license or a |
| 7 | + * compatible open source license. |
| 8 | + * |
| 9 | + */ |
| 10 | + |
| 11 | +package org.opensearch.dataprepper.plugins.lambda.common; |
| 12 | + |
| 13 | +import org.opensearch.dataprepper.model.codec.InputCodec; |
| 14 | +import org.opensearch.dataprepper.model.event.Event; |
| 15 | +import org.opensearch.dataprepper.model.plugin.PluginFactory; |
| 16 | +import org.opensearch.dataprepper.model.record.Record; |
| 17 | +import org.opensearch.dataprepper.plugins.lambda.common.accumlator.Buffer; |
| 18 | +import org.opensearch.dataprepper.plugins.lambda.common.config.ResponseHandling; |
| 19 | +import org.opensearch.dataprepper.plugins.lambda.common.config.StreamingOptions; |
| 20 | +import org.slf4j.Logger; |
| 21 | +import org.slf4j.LoggerFactory; |
| 22 | + |
| 23 | +import software.amazon.awssdk.services.lambda.LambdaAsyncClient; |
| 24 | +import software.amazon.awssdk.services.lambda.model.InvokeRequest; |
| 25 | +import software.amazon.awssdk.services.lambda.model.InvokeWithResponseStreamRequest; |
| 26 | +import software.amazon.awssdk.services.lambda.model.InvokeWithResponseStreamResponseHandler; |
| 27 | +import software.amazon.awssdk.services.lambda.model.ResponseStreamingInvocationType; |
| 28 | +import software.amazon.awssdk.services.lambda.model.invokewithresponsestreamresponseevent.DefaultPayloadChunk; |
| 29 | + |
| 30 | +import java.io.ByteArrayInputStream; |
| 31 | +import java.io.ByteArrayOutputStream; |
| 32 | +import java.io.IOException; |
| 33 | +import java.util.ArrayList; |
| 34 | +import java.util.List; |
| 35 | +import java.util.concurrent.CompletableFuture; |
| 36 | + |
| 37 | +/** |
| 38 | + * Handles actual streaming Lambda invocations using AWS SDK streaming API |
| 39 | + */ |
| 40 | +public class StreamingLambdaHandler { |
| 41 | + private static final Logger LOG = LoggerFactory.getLogger(StreamingLambdaHandler.class); |
| 42 | + |
| 43 | + private final LambdaAsyncClient lambdaAsyncClient; |
| 44 | + private final PluginFactory pluginFactory; |
| 45 | + private final InputCodec responseCodec; |
| 46 | + private final String functionName; |
| 47 | + private final StreamingOptions streamingOptions; |
| 48 | + |
| 49 | + public StreamingLambdaHandler( |
| 50 | + LambdaAsyncClient lambdaAsyncClient, |
| 51 | + PluginFactory pluginFactory, |
| 52 | + InputCodec responseCodec, |
| 53 | + String functionName, |
| 54 | + StreamingOptions streamingOptions) { |
| 55 | + this.lambdaAsyncClient = lambdaAsyncClient; |
| 56 | + this.pluginFactory = pluginFactory; |
| 57 | + this.responseCodec = responseCodec; |
| 58 | + this.functionName = functionName; |
| 59 | + this.streamingOptions = streamingOptions; |
| 60 | + } |
| 61 | + |
| 62 | + public CompletableFuture<List<Record<Event>>> invokeWithStreaming(Buffer inputBuffer) { |
| 63 | + |
| 64 | + CompletableFuture<List<Record<Event>>> resultFuture = new CompletableFuture<>(); |
| 65 | + ByteArrayOutputStream responseStream = new ByteArrayOutputStream(); |
| 66 | + |
| 67 | + // Get the InvokeRequest from buffer and extract payload |
| 68 | + InvokeRequest invokeRequest = inputBuffer.getRequestPayload(functionName, "RequestResponse"); |
| 69 | + if (invokeRequest == null) { |
| 70 | + resultFuture.completeExceptionally(new IllegalArgumentException("No payload in buffer")); |
| 71 | + return resultFuture; |
| 72 | + } |
| 73 | + |
| 74 | + InvokeWithResponseStreamRequest request = InvokeWithResponseStreamRequest.builder() |
| 75 | + .functionName(functionName) |
| 76 | + .invocationType(ResponseStreamingInvocationType.REQUEST_RESPONSE) |
| 77 | + .payload(invokeRequest.payload()) |
| 78 | + .build(); |
| 79 | + |
| 80 | + InvokeWithResponseStreamResponseHandler responseHandler = InvokeWithResponseStreamResponseHandler.builder() |
| 81 | + .onResponse(response -> { |
| 82 | + LOG.debug("Streaming response started for function: {}", functionName); |
| 83 | + }) |
| 84 | + .onEventStream(publisher -> { |
| 85 | + publisher.subscribe(event -> { |
| 86 | + if (event instanceof DefaultPayloadChunk) { |
| 87 | + DefaultPayloadChunk chunk = (DefaultPayloadChunk) event; |
| 88 | + try { |
| 89 | + // DefaultPayloadChunk should have payload() method |
| 90 | + byte[] chunkBytes = chunk.payload().asByteArray(); |
| 91 | + // Synchronize access to ByteArrayOutputStream as it's not thread-safe |
| 92 | + // AWS SDK may deliver chunks on different threads |
| 93 | + synchronized (responseStream) { |
| 94 | + responseStream.write(chunkBytes); |
| 95 | + } |
| 96 | + LOG.debug("Received chunk of size: {} bytes", chunkBytes.length); |
| 97 | + } catch (IOException e) { |
| 98 | + LOG.error("Error writing chunk to response stream", e); |
| 99 | + resultFuture.completeExceptionally(e); |
| 100 | + } |
| 101 | + } else { |
| 102 | + // Other events (e.g., InvokeComplete) are handled by onComplete() |
| 103 | + LOG.debug("Ignoring non-payload Lambda stream event: {}", event.getClass().getSimpleName()); |
| 104 | + } |
| 105 | + }); |
| 106 | + }) |
| 107 | + .onComplete(() -> { |
| 108 | + try { |
| 109 | + byte[] completeResponse = responseStream.toByteArray(); |
| 110 | + LOG.debug("Streaming response complete. Total size: {} bytes", completeResponse.length); |
| 111 | + |
| 112 | + List<Record<Event>> processedRecords = processStreamingResponse( |
| 113 | + completeResponse, inputBuffer, streamingOptions); |
| 114 | + resultFuture.complete(processedRecords); |
| 115 | + |
| 116 | + } catch (Exception e) { |
| 117 | + LOG.error("Error processing complete streaming response", e); |
| 118 | + resultFuture.completeExceptionally(e); |
| 119 | + } |
| 120 | + }) |
| 121 | + .onError(throwable -> { |
| 122 | + LOG.error("Error in streaming Lambda invocation", throwable); |
| 123 | + resultFuture.completeExceptionally(throwable); |
| 124 | + }) |
| 125 | + .build(); |
| 126 | + |
| 127 | + lambdaAsyncClient.invokeWithResponseStream(request, responseHandler); |
| 128 | + return resultFuture; |
| 129 | + } |
| 130 | + |
| 131 | + private List<Record<Event>> processStreamingResponse( |
| 132 | + byte[] responseBytes, |
| 133 | + Buffer inputBuffer, |
| 134 | + StreamingOptions streamingOptions) throws IOException { |
| 135 | + |
| 136 | + List<Record<Event>> resultRecords = new ArrayList<>(); |
| 137 | + |
| 138 | + try (ByteArrayInputStream responseStream = new ByteArrayInputStream(responseBytes)) { |
| 139 | + responseCodec.parse(responseStream, record -> { |
| 140 | + Event parsedEvent = record.getData(); |
| 141 | + resultRecords.add(new Record<>(parsedEvent)); |
| 142 | + }); |
| 143 | + } |
| 144 | + |
| 145 | + LOG.info("Processed streaming response: {} records from {} bytes", |
| 146 | + resultRecords.size(), responseBytes.length); |
| 147 | + |
| 148 | + // Apply response handling strategy |
| 149 | + return applyResponseHandling(resultRecords, inputBuffer, streamingOptions); |
| 150 | + } |
| 151 | + |
| 152 | + /** |
| 153 | + * Applies the configured response handling strategy to the parsed records. |
| 154 | + * |
| 155 | + * @param parsedRecords Records parsed from the streaming response |
| 156 | + * @param inputBuffer Original input buffer containing source events |
| 157 | + * @param streamingOptions Configuration for response handling |
| 158 | + * @return Processed records based on the handling strategy |
| 159 | + */ |
| 160 | + private List<Record<Event>> applyResponseHandling( |
| 161 | + List<Record<Event>> parsedRecords, |
| 162 | + Buffer inputBuffer, |
| 163 | + StreamingOptions streamingOptions) { |
| 164 | + |
| 165 | + if (streamingOptions == null || |
| 166 | + streamingOptions.getResponseHandling() != ResponseHandling.RECONSTRUCT_DOCUMENT) { |
| 167 | + // No reconstruction - return records as-is |
| 168 | + return parsedRecords; |
| 169 | + } |
| 170 | + |
| 171 | + // Reconstruct: merge all chunks into a single document |
| 172 | + return reconstructDocument(parsedRecords, inputBuffer); |
| 173 | + } |
| 174 | + |
| 175 | + /** |
| 176 | + * Reconstructs a single document from multiple streaming chunks. |
| 177 | + * All chunks from the streaming response are merged into one Event, |
| 178 | + * matching the original input event count. |
| 179 | + * |
| 180 | + * <p>The reconstructed event retains the original event's EventHandle, enabling proper end-to-end |
| 181 | + * acknowledgement tracking. Chunks are treated as transport-level fragments (due to Lambda's |
| 182 | + * response size limits) and are not tracked separately in the acknowledgement system - only the |
| 183 | + * final reconstructed event is acknowledged downstream.</p> |
| 184 | + * |
| 185 | + * @param parsedRecords All chunks parsed from the streaming response |
| 186 | + * @param inputBuffer Original input buffer |
| 187 | + * @return List containing the reconstructed document(s) |
| 188 | + */ |
| 189 | + private List<Record<Event>> reconstructDocument( |
| 190 | + List<Record<Event>> parsedRecords, |
| 191 | + Buffer inputBuffer) { |
| 192 | + |
| 193 | + if (parsedRecords.isEmpty()) { |
| 194 | + return parsedRecords; |
| 195 | + } |
| 196 | + |
| 197 | + // Get the original records to maintain event handle relationships |
| 198 | + List<Record<Event>> originalRecords = inputBuffer.getRecords(); |
| 199 | + |
| 200 | + if (originalRecords.isEmpty()) { |
| 201 | + LOG.warn("No original records found in buffer for reconstruction"); |
| 202 | + return parsedRecords; |
| 203 | + } |
| 204 | + |
| 205 | + // Defensive check: reconstruct-document requires exactly 1 event per buffer |
| 206 | + // This should be enforced by validation in LambdaProcessor, but we check here to prevent silent failures |
| 207 | + if (originalRecords.size() != 1) { |
| 208 | + String errorMsg = String.format( |
| 209 | + "reconstruct-document mode requires exactly 1 event per buffer, found %d events. " + |
| 210 | + "This should have been prevented by configuration validation. " + |
| 211 | + "Please ensure batch.threshold.event_count is set to 1.", |
| 212 | + originalRecords.size() |
| 213 | + ); |
| 214 | + LOG.error(errorMsg); |
| 215 | + throw new IllegalStateException(errorMsg); |
| 216 | + } |
| 217 | + |
| 218 | + // Merge all chunks into the single original record |
| 219 | + // This handles: 1 input event → multiple chunks → 1 reconstructed event |
| 220 | + Event reconstructedEvent = originalRecords.get(0).getData(); |
| 221 | + |
| 222 | + // Merge all parsed chunks into the reconstructed event |
| 223 | + for (Record<Event> parsedRecord : parsedRecords) { |
| 224 | + Event chunkEvent = parsedRecord.getData(); |
| 225 | + reconstructedEvent.merge(chunkEvent); |
| 226 | + } |
| 227 | + |
| 228 | + LOG.info("Reconstructed {} chunks into {} document(s)", |
| 229 | + parsedRecords.size(), originalRecords.size()); |
| 230 | + |
| 231 | + // Return the original records (now containing merged data) |
| 232 | + return originalRecords; |
| 233 | + } |
| 234 | +} |
0 commit comments