From 52b7137236da1113678f32a7023170c298a9fc7c Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Thu, 2 Apr 2026 13:36:50 +0200 Subject: [PATCH 01/17] HTM-1975 | HTM-1976: Remove OpenAPI export endpoint and define extract endpoint we still need some, now obsoleted, generated code to keep the compiler happy so that is moved to a temporary schema file --- pom.xml | 28 ++ .../api/controller/LayerExportController.java | 6 + .../openapi/obsolete-viewer-schemas.yaml | 35 +++ .../resources/openapi/status-responses.yaml | 28 ++ src/main/resources/openapi/viewer-api.yaml | 287 ++++++++++-------- .../resources/openapi/viewer-schemas.yaml | 16 +- 6 files changed, 264 insertions(+), 136 deletions(-) create mode 100644 src/main/resources/openapi/obsolete-viewer-schemas.yaml diff --git a/pom.xml b/pom.xml index c143bf8928..cba33e20e0 100644 --- a/pom.xml +++ b/pom.xml @@ -1220,6 +1220,34 @@ SPDX-License-Identifier: MIT true + + generate-obsolete-viewer-models + + generate + + + ${project.basedir}/src/main/resources/openapi/obsolete-viewer-schemas.yaml + spring + org.tailormap.api.viewer.model + spring-boot + + java8 + true + false + true + false + true + true + true + @Deprecated + + false + true + false + false + true + + generate-spec diff --git a/src/main/java/org/tailormap/api/controller/LayerExportController.java b/src/main/java/org/tailormap/api/controller/LayerExportController.java index ae6b8cbe86..4e6862497a 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExportController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExportController.java @@ -59,8 +59,14 @@ import org.tailormap.api.repository.FeatureSourceRepository; import org.tailormap.api.viewer.model.LayerExportCapabilities; +/** + * @deprecated This controller is deprecated and will be removed in a future release. Use the `/extract/` endpoint + * (TODO) instead, which provides more flexible data extraction capabilities and supports more data sources than + * just WFS. + */ @AppRestController @RequestMapping(path = "${tailormap-api.base-path}/{viewerKind}/{viewerName}/layer/{appLayerId}/export/") +@Deprecated(forRemoval = true) public class LayerExportController { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/src/main/resources/openapi/obsolete-viewer-schemas.yaml b/src/main/resources/openapi/obsolete-viewer-schemas.yaml new file mode 100644 index 0000000000..a07b30ac60 --- /dev/null +++ b/src/main/resources/openapi/obsolete-viewer-schemas.yaml @@ -0,0 +1,35 @@ +# +# Copyright (C) 2026 B3Partners B.V. +# +# SPDX-License-Identifier: MIT +# +openapi: 3.0.4 +info: + title: 'obsolete viewer models' + description: 'no servers or paths, just obsolete models in this document that need to be generated for backwards + compatibility, but should not be used in the API anymore.' + version: '1.0' + license: + name: 'MIT' + url: 'https://mit-license.org/' + contact: + name: 'B3Partners BV' + url: 'https://www.b3partners.nl/' + email: 'info@b3partners.nl' +servers: [ ] +paths: { } + +components: + schemas: + LayerExportCapabilities: + description: '**OBSOLETE**, since the export capabilities are now predefined and no longer need to be discovered per layer' + type: object + required: [exportable] + properties: + exportable: + nullable: false + type: boolean + outputFormats: + type: array + items: + type: string \ No newline at end of file diff --git a/src/main/resources/openapi/status-responses.yaml b/src/main/resources/openapi/status-responses.yaml index 151c1ced44..50710c524f 100644 --- a/src/main/resources/openapi/status-responses.yaml +++ b/src/main/resources/openapi/status-responses.yaml @@ -63,3 +63,31 @@ components: example: code: 500 message: 'Internal server error' + + ServerSentEventResponse: + description: 'Server Sent Event response. + The client can use the `eventType` property to determine the type of event, and the `details` property + to get specific event data.' + type: object + required: + - eventType + properties: + eventType: + description: 'Event type' + type: string + details: + description: 'Event data. Can be any JSON object or nothing, but should include at least a `status` property to indicate the status of the event.' + type: object + nullable: true + id: + description: 'optional event identifier, can be used for event ordering and deduplication' + type: string + format: uuid + nullable: true + example: + eventType: 'extract-started' + details: + status: 'started' + message: 'Extracting data' + progress: 10 + id: '123e4567-e89b-12d3-a456-426614174000' diff --git a/src/main/resources/openapi/viewer-api.yaml b/src/main/resources/openapi/viewer-api.yaml index 382ef1fb37..037328efa3 100644 --- a/src/main/resources/openapi/viewer-api.yaml +++ b/src/main/resources/openapi/viewer-api.yaml @@ -754,126 +754,6 @@ paths: schema: $ref: './status-responses.yaml#/components/schemas/RedirectResponse' - /{viewerKind}/{name}/layer/{appLayerId}/export/capabilities: - description: Returns layer export capabilities. Retrieving this information may take some time. - get: - operationId: 'getLayerExportCapabilities' - security: - - formAuth: [ ] - parameters: - - in: path - name: viewerKind - required: true - schema: - type: string - enum: - - app - - service - - description: 'viewer name' - in: path - name: name - required: true - schema: - type: string - - in: path - name: appLayerId - required: true - schema: - type: string - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: './viewer-schemas.yaml#/components/schemas/LayerExportCapabilities' - '401': - description: Unauthorized - content: - application/json: - schema: - $ref: './status-responses.yaml#/components/schemas/RedirectResponse' - - /{viewerKind}/{name}/layer/{appLayerId}/export/download: - description: Directly stream layer data export response for download. POST request to allow a large filter in the - request body. - post: - operationId: 'downloadLayerExport' - security: - - formAuth: [ ] - parameters: - - in: path - name: viewerKind - required: true - schema: - type: string - enum: - - app - - service - - description: 'viewer name' - in: path - name: name - required: true - schema: - type: string - - in: path - name: appLayerId - required: true - schema: - type: string - - name: outputFormat - description: Output format from capabilities. - in: query - schema: - type: string - - name: attributes - description: 'Attributes to include in export. If omitted all configured attributes are exported. For some - geo formats the geometry is included even if unchecked for display by the admin. For textual or spreadsheet - formats geometries are not included. Attribute item order is significant.' - in: query - required: false - schema: - type: array - items: - type: string - - in: query - name: filter - description: 'ECQL filter. See description for /app/{appId}/layer/{layerId}/features.' - required: false - schema: - type: string - - description: 'Attribute to sort by. See description for /app/{appId}/layer/{layerId}/features.' - in: query - name: sortBy - required: false - schema: - type: string - - description: 'Sort order for sortBy.' - in: query - name: sortOrder - required: false - schema: - type: string - default: asc - enum: - - asc - - desc - - description: 'Projection for geometry output.' - in: query - name: crs - required: false - schema: - type: string - responses: - '200': - description: OK - '401': - description: Unauthorized - content: - application/json: - schema: - $ref: './status-responses.yaml#/components/schemas/RedirectResponse' - /{viewerKind}/{name}/layer/{appLayerId}/features: summary: 'Use this endpoint to access features.' parameters: @@ -928,14 +808,14 @@ paths: schema: type: boolean default: false - - description: ' + - name: filter + description: ' A filter to be applied, possibly in combination with any other request parameters. The filter is an ECQL string, see [ECQL reference](https://docs.geoserver.org/latest/en/user/filter/ecql_reference.html). Filtering is supported when requesting a page of features, not when requesting a single feature (using `__fid`) nor when using x/y coordinates. ' in: query - name: filter required: false schema: type: string @@ -1928,3 +1808,166 @@ paths: application/json: schema: $ref: './status-responses.yaml#/components/schemas/ErrorResponse' + + + /{viewerKind}/{viewerName}/layer/{appLayerId}/extract: + description: 'Export the attributes as shown in the attribute list for a layer.' + post: + operationId: 'extractLayerAttributes' + security: + - formAuth: [ ] + parameters: + - name: viewerKind + in: path + required: true + schema: + type: string + enum: + - app + - service + - name: viewerName + in: path + required: true + schema: + type: string + - name: appLayerId + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - outputFormat + properties: + outputFormat: + description: 'Output format. + The allowed formats are configured per instance using `tailormap-api.export.allowed-outputformats`. + For geographical formats the (default) geometry is included even if not requested. + For textual or spreadsheet formats geometries are not included.' + type: string + nullable: false + example: 'application/geo+json' + attributes: + description: 'Attributes to include in export. + If omitted or empty, all configured attributes are exported. Attribute item order is significant.' + type: array + items: + type: string + filter: + description: 'ECQL filter. See description for /app/{appId}/layer/{layerId}/features.' + type: string + sortBy: + description: 'Attribute to sort by. See description for /app/{appId}/layer/{layerId}/features.' + type: string + sortOrder: + description: 'Sort order for sortBy. Ignored if sortBy is not set. See description for /app/{appId}/layer/{layerId}/features.' + type: string + default: asc + enum: + - asc + - desc + crs: + description: 'Projection for geometry output.' + type: string + responses: + '200': + description: 'Export started/in progress/finished. The client should listen to the stream and wait for an + `extract-completed` event with `status: completed` to know when the export is finished and the file is ready + to be downloaded. If the connection is closed before that, the export will be cancelled.' + content: + text/event-stream: + schema: + $ref: './status-responses.yaml#/components/schemas/ServerSentEventResponse' + example: + eventType: 'extract-started' + id: '123e4567-e89b-12d3-a456-426614174000' + details: + status: started + message: 'Extracting data' + progress: 50 + downloadId: '123e4567-e89b-12d3-a456-426614174001' + '400': + description: 'Bad Request. May be returned for some combination of parameters that can not be processed or are incomplete.' + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/ErrorResponse' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/RedirectResponse' + '500': + description: 'Internal server error' + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/ErrorResponse' + + /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/download/{downloadId}: + description: 'Download the result of an extract request. The extract should be initiated first by a POST to + `/{viewerKind}/{viewerName}/layer/{appLayerId}/extract`. + The "extract-completed" response of that request will include a `downloadId` which can then be used to download the file from this endpoint. + This two-step process is needed to allow for large extracts which can not be generated within a single request-response cycle.' + get: + operationId: 'downloadExtractedLayerAttributes' + security: + - formAuth: [ ] + parameters: + - name: viewerKind + in: path + required: true + schema: + type: string + enum: + - app + - service + - name: viewerName + in: path + required: true + schema: + type: string + - name: appLayerId + in: path + required: true + schema: + type: string + - name: downloadId + in: path + description: 'The id from the extract response to identify the file to download.' + required: true + schema: + type: string + format: uuid + responses: + '200': + description: 'OK' + content: + application/octet-stream: + schema: + type: string + format: binary + '401': + description: 'Unauthorized' + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/RedirectResponse' + '404': + description: 'Extract not found' + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/ErrorResponse' + '500': + description: 'Internal Server Error' + content: + application/json: + schema: + $ref: './status-responses.yaml#/components/schemas/ErrorResponse' diff --git a/src/main/resources/openapi/viewer-schemas.yaml b/src/main/resources/openapi/viewer-schemas.yaml index 774563cabc..363a282bf9 100644 --- a/src/main/resources/openapi/viewer-schemas.yaml +++ b/src/main/resources/openapi/viewer-schemas.yaml @@ -135,8 +135,8 @@ components: description: 'Tile size for XYZ layer' type: integer tileGridExtent: + # Tile grid bounds for XYZ layer $ref: './common-schemas.yaml#/components/schemas/Bounds' - description: 'Tile grid bounds for XYZ layer' hasAttributes: description: 'Whether this layer has attributes that can be accessed eg. for feature info or attribute list' type: boolean @@ -259,18 +259,6 @@ components: name: type: string - LayerExportCapabilities: - type: object - required: [exportable] - properties: - exportable: - nullable: false - type: boolean - outputFormats: - type: array - items: - type: string - Attribute: type: object required: [name] @@ -329,7 +317,7 @@ components: nullable: true minItems: 0 items: - $ref: './viewer-schemas.yaml#/components/schemas/AttachmentMetadata' + $ref: '#/components/schemas/AttachmentMetadata' example: __fid: 1 geometry: "POLYGON(...)" From a52ae7113528a8d60d7d410296e3ac3b46622d44 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Tue, 7 Apr 2026 18:04:19 +0200 Subject: [PATCH 02/17] HTM-1962: Add GeoTools CSV datastore --- pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pom.xml b/pom.xml index cba33e20e0..c63886bd6d 100644 --- a/pom.xml +++ b/pom.xml @@ -311,6 +311,10 @@ SPDX-License-Identifier: MIT org.geotools gt-cql + + org.geotools + gt-csv + org.geotools gt-epsg-hsql From 50fbeaccfbc0dd0046eeff414c2f6ef22c0303a8 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 10 Apr 2026 11:21:27 +0200 Subject: [PATCH 03/17] HTM-1961: Update /extract API, introduce and implement SSE /events API --- build/qa/PMD-ruleset_for_TM.xml | 2 +- .../ServerSentEventsController.java | 66 +++++++++++++ .../resources/openapi/status-responses.yaml | 5 +- src/main/resources/openapi/viewer-api.yaml | 99 ++++++++++++++++--- ...erSentEventsControllerIntegrationTest.java | 88 +++++++++++++++++ ...ControllerInvalidInputIntegrationTest.java | 38 +++++++ 6 files changed, 282 insertions(+), 16 deletions(-) create mode 100644 src/main/java/org/tailormap/api/controller/ServerSentEventsController.java create mode 100644 src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java create mode 100644 src/test/java/org/tailormap/api/controller/ServerSentEventsControllerInvalidInputIntegrationTest.java diff --git a/build/qa/PMD-ruleset_for_TM.xml b/build/qa/PMD-ruleset_for_TM.xml index ccc1203053..4bf97d09e9 100644 --- a/build/qa/PMD-ruleset_for_TM.xml +++ b/build/qa/PMD-ruleset_for_TM.xml @@ -113,7 +113,7 @@ SPDX-License-Identifier: MIT - + diff --git a/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java b/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java new file mode 100644 index 0000000000..6723f64c85 --- /dev/null +++ b/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static ch.rasc.sse.eventbus.SseEvent.DEFAULT_EVENT; + +import ch.rasc.sse.eventbus.SseEvent; +import ch.rasc.sse.eventbus.SseEventBus; +import java.lang.invoke.MethodHandles; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; +import org.tailormap.api.viewer.model.ServerSentEventResponse; +import tools.jackson.core.JacksonException; +import tools.jackson.databind.SerializationFeature; +import tools.jackson.databind.json.JsonMapper; + +@RestController +public class ServerSentEventsController { + private static final Logger logger = + LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final SseEventBus eventBus; + + private final JsonMapper jsonMapper; + + public ServerSentEventsController(SseEventBus eventBus, JsonMapper jsonMapper) { + this.eventBus = eventBus; + // force unindented/single line output for SSE messages, because we may have set + // spring.jackson.serialization.indent_output=true for debugging/development/test + if (jsonMapper.isEnabled(SerializationFeature.INDENT_OUTPUT)) { + this.jsonMapper = jsonMapper + .rebuild() + .configure(SerializationFeature.INDENT_OUTPUT, false) + .build(); + } else { + this.jsonMapper = jsonMapper; + } + } + + @GetMapping(path = "${tailormap-api.base-path}/events/{clientId}") + public SseEmitter sse(@PathVariable String clientId) { + // tests input against the set allowed by Nano ID + if (!clientId.matches("[A-Za-z0-9_-]+")) { + logger.warn("Invalid clientId for SSE connection: {}", clientId); + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid clientId"); + } + logger.debug("Adding new SSE client with id: {}", clientId); + return this.eventBus.createSseEmitter(clientId, 3600_000L, DEFAULT_EVENT); + } + + @Scheduled(fixedRate = 60_000) + public void keepAlive() throws JacksonException { + this.eventBus.handleEvent(SseEvent.ofData(jsonMapper.writeValueAsString( + new ServerSentEventResponse().eventType(ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE)))); + } +} diff --git a/src/main/resources/openapi/status-responses.yaml b/src/main/resources/openapi/status-responses.yaml index 50710c524f..4882ed0d21 100644 --- a/src/main/resources/openapi/status-responses.yaml +++ b/src/main/resources/openapi/status-responses.yaml @@ -75,8 +75,9 @@ components: eventType: description: 'Event type' type: string + enum: [ 'keep-alive', 'extract-progress', 'extract-completed', 'extract-failed' ] details: - description: 'Event data. Can be any JSON object or nothing, but should include at least a `status` property to indicate the status of the event.' + description: 'Event data. Can be any JSON object or nothing, but should include at least a `message` property to indicate the status of the event.' type: object nullable: true id: @@ -89,5 +90,5 @@ components: details: status: 'started' message: 'Extracting data' - progress: 10 + progress: 17 id: '123e4567-e89b-12d3-a456-426614174000' diff --git a/src/main/resources/openapi/viewer-api.yaml b/src/main/resources/openapi/viewer-api.yaml index 037328efa3..556a7d23e1 100644 --- a/src/main/resources/openapi/viewer-api.yaml +++ b/src/main/resources/openapi/viewer-api.yaml @@ -1809,8 +1809,71 @@ paths: schema: $ref: './status-responses.yaml#/components/schemas/ErrorResponse' + /events/{clientId}: + description: 'Subscribe to server-sent events for a viewer. The `clientId` is a client-generated identifier.' + get: + operationId: 'subscribeToEvents' + security: + - formAuth: [ ] + parameters: + - name: clientId + in: path + required: true + description: 'A client-generated identifier; + this must be the same as the clientId used in eg. the `/extract` request to correlate the events with the extract request. + The format should use the "Nano ID" format, for example `V1StGXR8_Z5jdHi6B-myT`.' + schema: + type: string + pattern: '[A-Za-z0-9_-]+' + responses: + '200': + description: 'OK' + content: + text/event-stream: + schema: + $ref: './status-responses.yaml#/components/schemas/ServerSentEventResponse' + example: + eventType: 'keep-alive' + id: '123e4567-e89b-12d3-a456-426614174000' - /{viewerKind}/{viewerName}/layer/{appLayerId}/extract: + /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/formats: + description: 'Get the configured output formats for layer attribute extraction for this viewer/layer. + Currently this is set per instance using the `tailormap-api.extract.allowed-outputformats` property, but in the future this may be configurable per layer/viewer.' + get: + operationId: 'getAllowedExtractFormats' + security: + - formAuth: [ ] + parameters: + - name: viewerKind + in: path + required: true + schema: + type: string + enum: + - app + - service + - name: viewerName + in: path + required: true + schema: + type: string + - name: appLayerId + in: path + required: true + schema: + type: string + responses: + '200': + description: 'OK' + content: + application/json: + schema: + type: array + items: + type: string + example: '["csv","shape.zip"]' + + /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}: description: 'Export the attributes as shown in the attribute list for a layer.' post: operationId: 'extractLayerAttributes' @@ -1835,6 +1898,14 @@ paths: required: true schema: type: string + - name: clientId + in: path + required: true + description: 'The client-generated id to identify the extract request; this must be the same as the clientId used to initiate the SSE stream in `/events/`. + This is used to correlate the extract response and the download request. The format should use the "Nano ID" format, for example `V1StGXR8_Z5jdHi6B-myT`.' + schema: + type: string + pattern: '[A-Za-z0-9_-]+' requestBody: required: true content: @@ -1846,9 +1917,8 @@ paths: properties: outputFormat: description: 'Output format. - The allowed formats are configured per instance using `tailormap-api.export.allowed-outputformats`. - For geographical formats the (default) geometry is included even if not requested. - For textual or spreadsheet formats geometries are not included.' + The allowed formats are configured per instance using `tailormap-api.extract.allowed-outputformats`. + The (default) geometry is included even if not requested.' type: string nullable: false example: 'application/geo+json' @@ -1871,18 +1941,21 @@ paths: enum: - asc - desc - crs: - description: 'Projection for geometry output.' - type: string responses: - '200': - description: 'Export started/in progress/finished. The client should listen to the stream and wait for an + '202': + description: 'Export started/queued. The client should listen to the `/events/` stream and wait for an `extract-completed` event with `status: completed` to know when the export is finished and the file is ready - to be downloaded. If the connection is closed before that, the export will be cancelled.' + to be downloaded. If the connection is closed before that, the export may be cancelled.' content: - text/event-stream: + application/json: schema: - $ref: './status-responses.yaml#/components/schemas/ServerSentEventResponse' + title: 'extractRequestResponse' + type: object + properties: + message: + type: string + required: + - message example: eventType: 'extract-started' id: '123e4567-e89b-12d3-a456-426614174000' @@ -1890,7 +1963,7 @@ paths: status: started message: 'Extracting data' progress: 50 - downloadId: '123e4567-e89b-12d3-a456-426614174001' + downloadId: '123e4567-e89b-12d3-a456-426614174001.csv' '400': description: 'Bad Request. May be returned for some combination of parameters that can not be processed or are incomplete.' content: diff --git a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java new file mode 100644 index 0000000000..a3fa19c06f --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; +import static org.tailormap.api.TestRequestProcessor.setServletPath; + +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; +import org.tailormap.api.annotation.PostgresIntegrationTest; +import org.tailormap.api.viewer.model.ServerSentEventResponse; + +@PostgresIntegrationTest +@AutoConfigureMockMvc +@Execution(ExecutionMode.CONCURRENT) +class ServerSentEventsControllerIntegrationTest { + private static final Logger logger = + LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + // Unique id avoids interference with parallel/other tests. + private final String sseClientId = "keepalive-test-" + System.nanoTime(); + + @Autowired + private MockMvc mockMvc; + + @Value("${tailormap-api.base-path}") + private String apiBasePath; + + private MvcResult sseResult; + + @BeforeEach + void start_sse_stream() throws Exception { + final String sseUrl = apiBasePath + "/events/" + sseClientId; + sseResult = mockMvc.perform(get(sseUrl) + .accept(MediaType.TEXT_EVENT_STREAM) + .with(setServletPath(sseUrl)) + .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(request().asyncStarted()) + .andReturn(); + } + + /** Check that at least 2 keep-alive messages arrive in 130 seconds. */ + @Test + void should_send_keep_alive_messages_for_two_minutes() { + // Keep this test running for at least 2 minutes, then assert at least 2 keep-alives arrived. + Awaitility.await("waiting for keep-alive messages") + .pollDelay(45, SECONDS) + .pollInterval(15, SECONDS) + .atLeast(2, MINUTES) + .atMost(130, SECONDS) + .logging(logPrinter -> logger.debug("Checking for keep-alive messages in SSE stream... {}", logPrinter)) + .untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_keep_alive_messages(stream), greaterThanOrEqualTo(2)); + }); + } + + private int count_keep_alive_messages(String stream) { + int count = 0; + int index = 0; + final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE + "\""; + while ((index = stream.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } +} diff --git a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerInvalidInputIntegrationTest.java b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerInvalidInputIntegrationTest.java new file mode 100644 index 0000000000..4c06803769 --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerInvalidInputIntegrationTest.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import static org.tailormap.api.TestRequestProcessor.setServletPath; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.tailormap.api.annotation.PostgresIntegrationTest; + +@PostgresIntegrationTest +@AutoConfigureMockMvc +class ServerSentEventsControllerInvalidInputIntegrationTest { + + @Autowired + private MockMvc mockMvc; + + @Value("${tailormap-api.base-path}") + private String apiBasePath; + + @Test + void invalid_client_id_should_return_bad_request() throws Exception { + final String invalidClientId = "invalid-te$t-" + System.nanoTime(); + final String sseUrl = apiBasePath + "/events/" + invalidClientId; + + mockMvc.perform(get(sseUrl).accept(MediaType.TEXT_EVENT_STREAM).with(setServletPath(sseUrl))) + .andExpect(status().isBadRequest()); + } +} From 8cd7ea95a717fcdd2d78e6fea6d4b6c5b227eb48 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:16:59 +0200 Subject: [PATCH 04/17] HTM-1961 | HTM-1962: Initial implemententation of /extract endpoint with CSV output format --- build/ci/docker-compose.yml | 2 +- .../api/configuration/AsyncConfig.java | 12 + .../api/configuration/base/WebMvcConfig.java | 5 + .../controller/LayerExtractController.java | 241 ++++++++++++ .../service/CreateLayerExtractService.java | 362 ++++++++++++++++++ .../java/org/tailormap/api/util/UUIDv7.java | 112 ++++++ src/main/resources/application.properties | 8 + ...LayerExtractControllerIntegrationTest.java | 294 ++++++++++++++ ...ollerRestrictedFormatsIntegrationTest.java | 118 ++++++ ...erSentEventsControllerIntegrationTest.java | 2 +- .../org/tailormap/api/util/UUIDv7Test.java | 59 +++ src/test/resources/application.properties | 8 + 12 files changed, 1221 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/tailormap/api/controller/LayerExtractController.java create mode 100644 src/main/java/org/tailormap/api/service/CreateLayerExtractService.java create mode 100644 src/main/java/org/tailormap/api/util/UUIDv7.java create mode 100644 src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java create mode 100644 src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java create mode 100644 src/test/java/org/tailormap/api/util/UUIDv7Test.java diff --git a/build/ci/docker-compose.yml b/build/ci/docker-compose.yml index 22759440fe..9185e0494d 100644 --- a/build/ci/docker-compose.yml +++ b/build/ci/docker-compose.yml @@ -181,4 +181,4 @@ services: start_period: 60s interval: 15s timeout: 5s - retries: 3 \ No newline at end of file + retries: 3 diff --git a/src/main/java/org/tailormap/api/configuration/AsyncConfig.java b/src/main/java/org/tailormap/api/configuration/AsyncConfig.java index 86d53db3d3..bba5eea0b1 100644 --- a/src/main/java/org/tailormap/api/configuration/AsyncConfig.java +++ b/src/main/java/org/tailormap/api/configuration/AsyncConfig.java @@ -28,4 +28,16 @@ public Executor passwordResetTaskExecutor() { executor.initialize(); return executor; } + + @Bean(name = "extractTaskExecutor") + public Executor extractTaskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(1); + executor.setMaxPoolSize(10); + executor.setQueueCapacity(100); + executor.setThreadNamePrefix("create-extract-"); + executor.setWaitForTasksToCompleteOnShutdown(false); + executor.initialize(); + return executor; + } } diff --git a/src/main/java/org/tailormap/api/configuration/base/WebMvcConfig.java b/src/main/java/org/tailormap/api/configuration/base/WebMvcConfig.java index 2563f92c32..0e9cfe9a3b 100644 --- a/src/main/java/org/tailormap/api/configuration/base/WebMvcConfig.java +++ b/src/main/java/org/tailormap/api/configuration/base/WebMvcConfig.java @@ -16,6 +16,7 @@ import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; import org.springframework.web.servlet.resource.EncodedResourceResolver; import org.tailormap.api.configuration.CaseInsensitiveEnumConverter; +import org.tailormap.api.controller.LayerExtractController; import org.tailormap.api.persistence.json.GeoServiceProtocol; import org.tailormap.api.scheduling.TaskType; @@ -63,5 +64,9 @@ public void addFormatters(@NonNull FormatterRegistry registry) { String.class, GeoServiceProtocol.class, new CaseInsensitiveEnumConverter<>(GeoServiceProtocol.class)); registry.addConverter(String.class, TaskType.class, new CaseInsensitiveEnumConverter<>(TaskType.class)); + registry.addConverter( + String.class, + LayerExtractController.ExtractOutputFormat.class, + new CaseInsensitiveEnumConverter<>(LayerExtractController.ExtractOutputFormat.class)); } } diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java new file mode 100644 index 0000000000..0d492fb463 --- /dev/null +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static org.tailormap.api.persistence.helper.TMFeatureTypeHelper.getConfiguredAttributes; + +import io.micrometer.core.annotation.Counted; +import io.micrometer.core.annotation.Timed; +import jakarta.validation.Valid; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; +import org.geotools.api.filter.sort.SortOrder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.core.io.UrlResource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.ModelAttribute; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.server.ResponseStatusException; +import org.tailormap.api.annotation.AppRestController; +import org.tailormap.api.persistence.Application; +import org.tailormap.api.persistence.GeoService; +import org.tailormap.api.persistence.TMFeatureType; +import org.tailormap.api.persistence.json.AppLayerSettings; +import org.tailormap.api.persistence.json.AppTreeLayerNode; +import org.tailormap.api.persistence.json.GeoServiceLayer; +import org.tailormap.api.repository.FeatureSourceRepository; +import org.tailormap.api.service.CreateLayerExtractService; + +@AppRestController +@RequestMapping(path = "${tailormap-api.base-path}/{viewerKind}/{viewerName}/layer/{appLayerId}/extract") +public class LayerExtractController { + private static final Logger logger = + LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final Pattern SAFE_DOWNLOAD_ID = Pattern.compile("^[A-Za-z0-9._-]+$"); + private final FeatureSourceRepository featureSourceRepository; + private final CreateLayerExtractService createLayerExtractService; + + @Value("#{'${tailormap-api.extract.allowed-outputformats}'.split(',')}") + private List allowedExtractOutputFormats; + + public LayerExtractController( + FeatureSourceRepository featureSourceRepository, CreateLayerExtractService createLayerExtractService) { + this.featureSourceRepository = featureSourceRepository; + this.createLayerExtractService = createLayerExtractService; + } + + /** + * Download the result of an extract request. The extract generation should be initiated first by a POST to + * {@code /{viewerKind}/{viewerName}/layer/{appLayerId}/extract}. + */ + @GetMapping(path = "/download/{downloadId}") + @Counted(value = "tailormap_api_extract_download", description = "Count of layer extract downloads") + public ResponseEntity download( + @ModelAttribute GeoService service, + @ModelAttribute GeoServiceLayer layer, + @ModelAttribute Application application, + @ModelAttribute AppTreeLayerNode appTreeLayerNode, + @PathVariable String downloadId) + throws MalformedURLException { + + if (downloadId == null || !SAFE_DOWNLOAD_ID.matcher(downloadId).matches()) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid downloadId"); + } + Path exportRoot = Path.of(createLayerExtractService.getExportFilesLocation()) + .toAbsolutePath() + .normalize(); + Path filePath = exportRoot.resolve(downloadId).normalize(); + if (!filePath.startsWith(exportRoot)) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid downloadId"); + } + + Resource resource = new UrlResource(filePath.toUri()); + if (!resource.exists() || !resource.isReadable() || !resource.isFile()) { + throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Download file not found"); + } + + String contentType = MediaType.APPLICATION_OCTET_STREAM_VALUE; + try { + String detectedContentType = Files.probeContentType(filePath); + if (detectedContentType != null) { + contentType = detectedContentType; + } + } catch (IOException e) { + logger.debug("Could not determine content type for {}", filePath, e); + } + + return ResponseEntity.ok() + .contentType(MediaType.parseMediaType(contentType)) + .header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + filePath.getFileName() + "\"") + .body(resource); + } + + @GetMapping("/formats") + public ResponseEntity formats( + @Valid @ModelAttribute GeoServiceLayer layer, + @ModelAttribute GeoService service, + @ModelAttribute Application application, + @ModelAttribute AppTreeLayerNode appTreeLayerNode) { + return ResponseEntity.ok(allowedExtractOutputFormats); + } + + @Transactional + @PostMapping("/{clientId}") + @Timed(value = "tailormap_api_extract", description = "Time taken to process a layer extract request") + public ResponseEntity extract( + @Valid @ModelAttribute GeoServiceLayer layer, + @ModelAttribute GeoService service, + @ModelAttribute Application application, + @ModelAttribute AppTreeLayerNode appTreeLayerNode, + @PathVariable String clientId, + @RequestParam ExtractOutputFormat outputFormat, + @RequestParam(required = false) Set attributes, + @RequestParam(required = false) String filter, + @RequestParam(required = false) String sortBy, + @RequestParam(required = false, defaultValue = "asc") String sortOrder) { + + try { + createLayerExtractService.validateClientId(clientId); + } catch (IllegalArgumentException e) { + logger.warn("Invalid clientId for extract request: {}", clientId); + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, e.getMessage()); + } + + if (!allowedExtractOutputFormats.contains(outputFormat)) { + logger.debug("Invalid output format requested: {}", outputFormat); + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid output format"); + } + + TMFeatureType sourceFT = service.findFeatureTypeForLayer(layer, featureSourceRepository); + if (sourceFT == null) { + logger.debug("Layer export requested for layer without feature type"); + throw new ResponseStatusException(HttpStatus.NOT_FOUND); + } + if (attributes == null) { + attributes = new HashSet<>(); + } + + AppLayerSettings appLayerSettings = application.getAppLayerSettings(appTreeLayerNode); + // Get attributes in configured or original order + Set nonHiddenAttributes = + getConfiguredAttributes(sourceFT, appLayerSettings).keySet(); + + if (!attributes.isEmpty()) { + // Only export non-hidden property names + if (!nonHiddenAttributes.containsAll(attributes)) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + "One or more requested attributes are not available on the feature type"); + } + } else if (!sourceFT.getSettings().getHideAttributes().isEmpty()) { + // Only specify specific propNames if there are hidden attributes. Having no propNames + // request parameter to request all propNames is less error-prone than specifying the ones + // we have saved in the feature type + attributes = new HashSet<>(nonHiddenAttributes); + } + + // Empty attributes means we won't specify propNames in the GetFeature request. However, if we do select only + // some property names, we need the geometry attribute which is not in the 'attributes' request param so spatial + // export formats don't have the geometry missing. + if (!attributes.isEmpty() && sourceFT.getDefaultGeometryAttribute() != null) { + attributes.add(sourceFT.getDefaultGeometryAttribute()); + } + + SortOrder sortingOrder = SortOrder.ASCENDING; + if (null != sortOrder && (sortOrder.equalsIgnoreCase("desc") || sortOrder.equalsIgnoreCase("asc"))) { + sortingOrder = SortOrder.valueOf(sortOrder.toUpperCase(Locale.ROOT)); + } + + final String outputFileName = + this.createLayerExtractService.createExtractFilename(clientId, sourceFT, outputFormat); + this.createLayerExtractService.emitProgress(clientId, outputFileName, 0, false, "Extract task received"); + + //noinspection JvmTaintAnalysis Not a Path Traversal Sink because the clientId is validated + this.createLayerExtractService.createLayerExtract( + clientId, sourceFT, attributes, filter, sortBy, sortingOrder, outputFormat, outputFileName); + + //noinspection JvmTaintAnalysis Not an XSS sink because the response is a json message + return ResponseEntity.accepted() + .body(Map.of("message", "Extract request accepted", "downloadId", outputFileName)); + } + + public enum ExtractOutputFormat { + CSV("csv", "csv"), + GEOJSON("geojson", "json"), + XLSX("xlsx", "xlsx"), + SHAPE("shape", "zip"); + + private final String value; + private final String extension; + + ExtractOutputFormat(String value, String extension) { + this.value = value; + this.extension = extension; + } + + public static ExtractOutputFormat fromValue(String value) { + for (ExtractOutputFormat format : ExtractOutputFormat.values()) { + if (format.value.equalsIgnoreCase(value)) { + return format; + } + } + throw new IllegalArgumentException("Invalid output format: " + value); + } + + public String getValue() { + return this.value; + } + + public String getExtension() { + return this.extension; + } + + @Override + public String toString() { + return String.valueOf(this.value); + } + } +} diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java new file mode 100644 index 0000000000..084a6f039f --- /dev/null +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -0,0 +1,362 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.service; + +import ch.rasc.sse.eventbus.SseEvent; +import ch.rasc.sse.eventbus.SseEventBus; +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.lang.invoke.MethodHandles; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; +import org.geotools.api.data.FeatureEvent; +import org.geotools.api.data.FileDataStore; +import org.geotools.api.data.Query; +import org.geotools.api.data.SimpleFeatureSource; +import org.geotools.api.data.SimpleFeatureStore; +import org.geotools.api.data.Transaction; +import org.geotools.api.feature.simple.SimpleFeatureType; +import org.geotools.api.filter.Filter; +import org.geotools.api.filter.FilterFactory; +import org.geotools.api.filter.sort.SortOrder; +import org.geotools.data.DataUtilities; +import org.geotools.data.DefaultTransaction; +import org.geotools.data.csv.CSVDataStoreFactory; +import org.geotools.factory.CommonFactoryFinder; +import org.geotools.feature.SchemaException; +import org.geotools.filter.text.cql2.CQLException; +import org.geotools.filter.text.ecql.ECQL; +import org.geotools.util.factory.GeoTools; +import org.jspecify.annotations.NonNull; +import org.jspecify.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.scheduling.annotation.Async; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.tailormap.api.controller.LayerExtractController; +import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper; +import org.tailormap.api.persistence.TMFeatureType; +import org.tailormap.api.util.UUIDv7; +import org.tailormap.api.viewer.model.ServerSentEventResponse; +import tools.jackson.databind.SerializationFeature; +import tools.jackson.databind.json.JsonMapper; + +@Service +public class CreateLayerExtractService { + private static final Logger logger = + LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private final SseEventBus eventBus; + private final JsonMapper jsonMapper; + private final FeatureSourceFactoryHelper featureSourceFactoryHelper; + + private final FilterFactory ff = CommonFactoryFinder.getFilterFactory(GeoTools.getDefaultHints()); + + // we can safely use the tmp dir as a default here because we are running in a docker container so access is limited + @Value("${tailormap-api.extract.location:#{systemProperties['java.io.tmpdir']}}") + private String exportFilesLocation; + + @Value("${tailormap-api.extract.cleanup-minutes:120}") + private int cleanupIntervalMinutes; + + public CreateLayerExtractService( + SseEventBus eventBus, JsonMapper jsonMapper, FeatureSourceFactoryHelper featureSourceFactoryHelper) { + this.eventBus = eventBus; + this.featureSourceFactoryHelper = featureSourceFactoryHelper; + // force unindented/single line output for SSE messages, because we may have set + // spring.jackson.serialization.indent_output=true for debugging/development/test + if (jsonMapper.isEnabled(SerializationFeature.INDENT_OUTPUT)) { + this.jsonMapper = jsonMapper + .rebuild() + .configure(SerializationFeature.INDENT_OUTPUT, false) + .build(); + } else { + this.jsonMapper = jsonMapper; + } + } + + public String getExportFilesLocation() { + return exportFilesLocation; + } + + private void emitError(@NonNull String clientId, String details) { + eventBus.handleEvent(SseEvent.builder() + .addClientId(clientId) + .data(jsonMapper.writeValueAsString(new ServerSentEventResponse() + .eventType(ServerSentEventResponse.EventTypeEnum.EXTRACT_FAILED) + .id(UUIDv7.randomV7()) + .details(Map.of( + "message", "An error occurred during extract creation", "explanation", details)))) + .build()); + } + + public void emitProgress( + @NonNull String clientId, + @Nullable String fileId, + int progress, + boolean completed, + @Nullable String message) { + logger.debug("Emitting progress {} for layer with id {}", progress, clientId); + + message = StringUtils.isBlank(message) ? "Extract task started" : message; + fileId = StringUtils.isBlank(fileId) ? "" : fileId; + + eventBus.handleEvent(SseEvent.builder() + .addClientId(clientId) + .data(jsonMapper.writeValueAsString(new ServerSentEventResponse() + .eventType( + completed + ? ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + : ServerSentEventResponse.EventTypeEnum.EXTRACT_PROGRESS) + .id(UUIDv7.randomV7()) + .details(Map.of( + "progress", + progress, + "message", + completed ? "Extract task completed" : message, + "downloadId", + fileId)))) + .build()); + } + + /** + * Check the sse client id is valid and exists. + * + * @param clientId the SSE client id + * @throws IllegalArgumentException when the SSE client id is invalid or not found on the event bus + */ + public void validateClientId(@NonNull String clientId) throws IllegalArgumentException { + if (!clientId.matches("[A-Za-z0-9_-]+")) { + logger.warn("Invalid clientId for SSE connection: {}", clientId); + throw new IllegalArgumentException("Invalid clientId"); + } + + // validate the given clientId is known on the event bus + this.eventBus.getAllClientIds().stream() + .filter(id -> Objects.equals(id, clientId)) + .findFirst() + .ifPresentOrElse(id -> logger.debug("Validated clientId {}", id), () -> { + throw new IllegalArgumentException("No active subscription found for clientId " + clientId); + }); + } + + /** + * Create a validated filename for an extract. The naming follows the pattern + * {@code "%s_%s_%s.%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), outputFormat.getExtension()) } + * where the first part is the source feature type name, the second part is the SSE client id, the third part is a + * random UUIDv7 and the fourth part is the file extension based on the requested output format. + * + * @param clientId the SSE client id + * @param sourceFT the source featuretype for the extract + * @param outputFormat the required format of the extract + * @return the filename used to create an extract + * @throws IllegalArgumentException when the SSE clientId is invalid or not found on the event bus + */ + public String createExtractFilename( + @NonNull String clientId, + @NonNull TMFeatureType sourceFT, + LayerExtractController.@NonNull ExtractOutputFormat outputFormat) + throws IllegalArgumentException { + + this.validateClientId(clientId); + + String cleanFTName = sourceFT.getName(); + if (cleanFTName.contains(":")) { + // clip off the WFS namespace part + cleanFTName = cleanFTName.substring(cleanFTName.lastIndexOf(":") + 1); + } + return "%s_%s_%s.%s".formatted(cleanFTName, clientId, UUIDv7.randomV7(), outputFormat.getExtension()); + } + + @Async("extractTaskExecutor") + @Transactional + public void createLayerExtract( + @NonNull String clientId, + @NonNull TMFeatureType inputTmFeatureType, + @NonNull Set attributes, + String filterCQL, + String sortBy, + SortOrder sortOrder, + LayerExtractController.@NonNull ExtractOutputFormat extractOutputFormat, + @NonNull String outputFileName) { + SimpleFeatureSource inputFeatureSource = null; + + this.emitProgress(clientId, outputFileName, 0, false, null); + + try (Transaction outputTransaction = new DefaultTransaction("tailormap-extract-output")) { + inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(inputTmFeatureType); + + Query q = new Query(inputFeatureSource.getName().toString()); + q.setPropertyNames(attributes.toArray(new String[0])); + if (!StringUtils.isBlank(filterCQL)) { + Filter filter = ECQL.toFilter(filterCQL); + q.setFilter(filter); + } + if (!StringUtils.isBlank(sortBy)) { + q.setSortBy(ff.sort(sortBy, Objects.requireNonNullElse(sortOrder, SortOrder.ASCENDING))); + } + + final int featCount = inputFeatureSource.getCount(q); + AtomicInteger featsAdded = new AtomicInteger(); + logger.debug("Filtered source counts {}", featCount); + + FileDataStore outputDataStore = getExtractDataStore(extractOutputFormat, outputFileName, clientId); + SimpleFeatureType fType = + DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); + outputDataStore.createSchema(fType); + + if (outputDataStore.getFeatureSource() instanceof SimpleFeatureStore featureStore) { + featureStore.setTransaction(outputTransaction); + featureStore.addFeatureListener(event -> { + if (event.getType().equals(FeatureEvent.Type.ADDED)) { + featsAdded.getAndIncrement(); + } + if (featCount > 0) { + if (featsAdded.get() % 50 == 0) { + this.emitProgress( + clientId, + outputFileName, + (int) ((featsAdded.doubleValue() / featCount) * 100), + false, + null); + } + } + }); + featureStore.addFeatures(inputFeatureSource.getFeatures(q)); + outputTransaction.commit(); + } else { + this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features"); + logger.error("Output datastore is not a SimpleFeatureStore, cannot write features"); + } + outputDataStore.dispose(); + this.emitProgress(clientId, outputFileName, 100, true, null); + } catch (IOException | CQLException | SchemaException e) { + emitError(clientId, e.getMessage()); + logger.error("Creating extract failed", e); + } finally { + if (inputFeatureSource != null) { + try { + inputFeatureSource.getDataStore().dispose(); + } catch (Exception e) { + logger.warn("Error disposing datastore for feature source {}", inputFeatureSource.getName(), e); + } + } + } + } + + private FileDataStore getExtractDataStore( + LayerExtractController.ExtractOutputFormat extractOutputFormat, String outputFileName, String clientId) + throws IOException { + + final File outputFile = Files.createFile(Path.of(exportFilesLocation, outputFileName)) + .toFile() + .getCanonicalFile(); + if (!outputFile + .getPath() + .startsWith(Path.of(exportFilesLocation).toFile().getCanonicalPath())) { + throw new IOException("Invalid file path"); + } + + if (!logger.isDebugEnabled()) { + // delete in production after JVM exit because the event bus will be reset when the JVM exits, and then we + // are unlikely to have a reference to the file anymore. + // In debug/development mode we want to keep the file for inspection. + outputFile.deleteOnExit(); + } + + switch (extractOutputFormat) { + case CSV -> { + Map params = Map.of( + CSVDataStoreFactory.FILE_PARAM.key, + outputFile, + CSVDataStoreFactory.STRATEGYP.key, + CSVDataStoreFactory.WKT_STRATEGY, + CSVDataStoreFactory.WKTP.key, + "the_geom_wkt", + CSVDataStoreFactory.WRITEPRJ.key, + false, + CSVDataStoreFactory.QUOTEALL.key, + true); + return (FileDataStore) new CSVDataStoreFactory().createNewDataStore(params); + } + // TODO implement + case GEOJSON, XLSX, SHAPE -> { + emitError(clientId, "Output format " + extractOutputFormat + " is not yet supported"); + logger.error("Output format {} is not yet supported", extractOutputFormat); + throw new IOException("Unsupported output format: " + extractOutputFormat); + } + default -> { + // should never happen + emitError(clientId, "Unknown output format: " + extractOutputFormat); + logger.error("Unknown output format: {}", extractOutputFormat); + throw new IllegalArgumentException("Unknown output format: " + extractOutputFormat); + } + } + } + + /** + * Cleanup expired extract files. Filenames are created in {@link CreateLayerExtractService#createExtractFilename } + * and follow the pattern {@code "%s_%s_%s.%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), + * outputFormat.getExtension()) } + */ + @Scheduled(fixedDelay = 5, timeUnit = TimeUnit.MINUTES, initialDelay = 15) + public void cleanupExpiredExtracts() { + logger.debug("Running expired extracts cleanup..."); + List clientFilesOnDisk = new ArrayList<>(); + Set validClientIds = eventBus.getAllClientIds(); + + // list download files in export location and delete those that are not bound to an active sse stream client + try (Stream stream = Files.walk(Path.of(exportFilesLocation))) { + stream.filter(Files::isRegularFile).forEach(path -> { + File file = path.toFile(); + String filename = file.getName(); + String[] parts = filename.split("[_.]", -1); + if (parts.length < 4) { + logger.warn("Unexpected file in extract location: {}", filename); + return; + } + String clientId = parts[1]; + if (!validClientIds.contains(clientId)) { + if (!file.delete()) { + logger.error("Failed to delete unattached extract file {}", filename); + } + } else { + Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2])); + clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId)); + } + }); + + // delete any files are older than the cutoff + clientFilesOnDisk.stream() + .filter(f -> f.timestamp() + .isBefore(Instant.now().minusSeconds(TimeUnit.MINUTES.toSeconds(cleanupIntervalMinutes)))) + .forEach(f -> { + if (!f.file().delete()) { + logger.error( + "Failed to delete expired extract file {}", + f.file().getName()); + } + }); + } catch (IOException e) { + logger.error("Error while cleaning up expired extracts", e); + } + } + + private record FileWithAttributes(File file, Instant timestamp, String clientId) {} +} diff --git a/src/main/java/org/tailormap/api/util/UUIDv7.java b/src/main/java/org/tailormap/api/util/UUIDv7.java new file mode 100644 index 0000000000..a9fbf166d1 --- /dev/null +++ b/src/main/java/org/tailormap/api/util/UUIDv7.java @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.util; + +import java.nio.ByteBuffer; +import java.security.SecureRandom; +import java.time.Instant; +import java.util.UUID; + +/** + * A utility class to create UUID version 7, which is time-sortable. + * + * @see UUIDv7 in 33 languages for more details. + * @see for a potential future built-in implementation in Java + * 26. + */ +public class UUIDv7 { + private static final SecureRandom random = new SecureRandom(); + + private UUIDv7() { + // private constructor for utility class + } + + /** + * Create a random version 7 UUID. + * + * @return a random version 7 UUID + */ + public static UUID randomV7() { + byte[] value = randomBytes(); + ByteBuffer buf = ByteBuffer.wrap(value); + long high = buf.getLong(); + long low = buf.getLong(); + return new UUID(high, low); + } + + private static byte[] randomBytes() { + byte[] value = new byte[16]; + random.nextBytes(value); + ByteBuffer timestamp = ByteBuffer.allocate(Long.BYTES).putLong(System.currentTimeMillis()); + System.arraycopy(timestamp.array(), 2, value, 0, 6); + // set version 7 + value[6] = (byte) ((value[6] & 0x0F) | 0x70); + // set RFC 4122 variant + value[8] = (byte) ((value[8] & 0x3F) | 0x80); + return value; + } + + /** + * Extract the Unix epoch timestamp (milliseconds) from a UUIDv7. + * + * @param uuidv7 the UUIDv7 value + * @return the embedded Unix epoch timestamp in milliseconds + * @throws IllegalArgumentException if the given UUID is not version 7 + */ + public static long timestamp(UUID uuidv7) throws IllegalArgumentException { + if (uuidv7 == null) { + throw new IllegalArgumentException("UUID cannot be null"); + } + if (uuidv7.version() != 7) { + throw new IllegalArgumentException("UUID is not version 7"); + } + // UUIDv7 stores a 48-bit Unix epoch millisecond timestamp in the first 6 bytes. + return (uuidv7.getMostSignificantBits() >>> 16) & 0x0000FFFFFFFFFFFFL; + } + + /** + * Extract the Unix epoch timestamp as an Instant from a UUIDv7. + * + * @param uuidv7 the UUIDv7 value + * @return the embedded Unix epoch timestamp in milliseconds + * @throws IllegalArgumentException if the given UUID is not version 7 + * @see #timestamp(UUID) + */ + public static Instant timestampAsInstant(UUID uuidv7) throws IllegalArgumentException { + return Instant.ofEpochMilli(timestamp(uuidv7)); + } + + /** + * Parse a string representation of a UUIDv7. + * + *

Accepts the standard {@code xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} format. + * + * @param value the string to parse + * @return the parsed UUIDv7 + * @throws IllegalArgumentException if the string is not a valid UUID or not version 7 + */ + public static UUID fromString(String value) throws IllegalArgumentException { + if (value == null || value.isBlank()) { + throw new IllegalArgumentException("UUID string cannot be null or blank"); + } + // Remove hyphens and parse the 32 hex characters directly + String hex = value.replace("-", ""); + if (hex.length() != 32) { + throw new IllegalArgumentException("Invalid UUID string: " + value); + } + try { + long high = Long.parseUnsignedLong(hex, 0, 16, 16); + long low = Long.parseUnsignedLong(hex, 16, 32, 16); + UUID uuid = new UUID(high, low); + if (uuid.version() != 7) { + throw new IllegalArgumentException("UUID is not version 7: " + value); + } + return uuid; + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid UUID string: " + value, e); + } + } +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 2de8a8bea2..ec8813cb6d 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -29,8 +29,16 @@ tailormap-api.features.wfs_count_exact=false tailormap-api.feature.info.maxitems=30 # Should match the list in tailormap-viewer class AttributeListExportService +# deprecated tailormap-api.export.allowed-outputformats=csv,text/csv,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,excel2007,application/vnd.shp,application/x-zipped-shp,SHAPE-ZIP,application/geopackage+sqlite3,application/x-gpkg,geopackage,geopkg,gpkg,application/geo+json,application/geojson,application/json,json,DXF-ZIP +# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormattFormat for valid values +tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape +# any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk +# tailormap-api.extract.cleanup-minutes=120 +# the directory where the extract output files are stored, should be writable by the application +# tailormap-api.extract.location=/tmp + # proxy passthrough regex patterns for layer names, when empty no additional layers are allowed to be proxied # eg. use vw_t_gi_%s_[a-fA-F0-9]{32} to match `vw_t_gi_layername_70cae9814c6144808f1c9bb921099794` as a sub-layer of layername # %s is replaced with the layer name from the configuration (this uses String.format() syntax, so be aware of the escaping rules for % and \) diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java new file mode 100644 index 0000000000..bf94c24ef2 --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.startsWith; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import static org.tailormap.api.TestRequestProcessor.setServletPath; +import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; +import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junitpioneer.jupiter.Stopwatch; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; +import org.springframework.http.MediaType; +import org.springframework.security.test.context.support.WithMockUser; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; +import org.tailormap.api.StaticTestData; +import org.tailormap.api.annotation.PostgresIntegrationTest; +import org.tailormap.api.viewer.model.ServerSentEventResponse; +import tools.jackson.databind.ObjectMapper; + +@PostgresIntegrationTest +@AutoConfigureMockMvc +@Execution(ExecutionMode.CONCURRENT) +@Stopwatch +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class LayerExtractControllerIntegrationTest { + private static final String extractPath = "/extract/"; + private static final String downloadPath = "/extract/download/"; + // Use a unique clientId per test instance to avoid cross-test interference + // when running concurrently. + private final String sseClientId = "testcase-" + System.nanoTime(); + + @Autowired + private MockMvc mockMvc; + + @Value("${tailormap-api.base-path}") + private String apiBasePath; + + /** SSE connection result; its response buffer accumulates server-sent events. */ + private MvcResult sseResult; + + @BeforeEach + void start_sse_stream() throws Exception { + final String sseUrl = apiBasePath + "/events/" + sseClientId; + sseResult = mockMvc.perform(get(sseUrl) + .accept(MediaType.TEXT_EVENT_STREAM) + .with(setServletPath(sseUrl)) + .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(request().asyncStarted()) + .andReturn(); + } + + @Test + void should_export_large_filter_to_csv() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "csv") + .param("filter", StaticTestData.get("large_cql_filter")) + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(30, SECONDS).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(".csv")); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("text/csv")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + final String csvContent = download.getResponse().getContentAsString(); + assertEquals( + 19, + csvContent.lines().count(), + "Expected 19 lines in the CSV output, including header and 18 data rows"); + } + + @Test + void should_export_large_output_to_csv() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "identificatie, class") + .param("outputFormat", "csv") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(5, MINUTES).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(".csv")); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("text/csv")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + final String csvContent = download.getResponse().getContentAsString(); + assertEquals( + 3663, + csvContent.lines().count(), + "Expected 3663 lines in the CSV output, including header and 3662 data rows"); + csvContent.lines().findFirst().ifPresent(header -> { + assertThat(header, containsString("identificatie")); + assertThat(header, containsString("class")); + // geometry is always included and the name is fixed + assertThat(header, containsString("the_geom_wkt")); + // these - among others - should not be exported + assertThat(header, not(containsString("bronhouder"))); + assertThat(header, not(containsString("lv_publicatiedatum"))); + }); + } + + @WithMockUser( + username = "tm-admin", + authorities = {"admin"}) + @Test + void should_export_wfs_to_csv_with_authentication() throws Exception { + final String extractUrl = apiBasePath + layerProxiedWithAuthInPublicApp + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "geom,naam,code") + .param("outputFormat", "csv") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(5, MINUTES).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(".csv")); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("text/csv")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + final String csvContent = download.getResponse().getContentAsString(); + assertEquals( + 13, + csvContent.lines().count(), + "Expected 13 lines in the CSV output, including header and 12 data rows"); + csvContent.lines().findFirst().ifPresent(header -> { + // geometry is always included and the name is fixed/hardcoded + assertThat(header, containsString("the_geom_wkt")); + assertThat(header, containsString("naam")); + assertThat(header, containsString("code")); + assertThat(header, startsWith("\"the_geom_wkt\",\"naam\",\"code\"")); + assertThat(header, not(containsString("ligtInLandNaam"))); + }); + } + + /** + * Parse the last non-empty line from the SSE stream that looks something like: + * {@code data:{"details":{"message":"Extract task + * completed","progress":100,"file":"begroeidterreindeel15061479295163305053.csv"},"eventType":"extract-completed","id":"019d6838-7f48-7053-9256-dd4b57c14264"} + * } as JSON and extract the file from the details. + */ + private String getLastCompletedEventJson(String sseMessages) throws IOException { + return java.util.Arrays.stream(sseMessages.split("\\R")) + .map(String::trim) + .filter(line -> !line.isEmpty()) + .filter(line -> line.startsWith("data:")) + .filter(line -> line.contains("\"eventType\":\"extract-completed\"")) + .reduce((first, second) -> second) + .orElseThrow() + .substring("data:".length()); + } + + private String getDownloadId(String eventJson) { + return new ObjectMapper() + .readTree(eventJson) + .path("details") + .path("downloadId") + .asString(); + } + + private int count_completed_messages(String s) { + int count = 0; + int index = 0; + final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + "\""; + while ((index = s.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } +} diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java new file mode 100644 index 0000000000..711dd5ea0c --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultHandlers.print; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import static org.tailormap.api.TestRequestProcessor.setServletPath; +import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; +import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; + +import java.nio.charset.StandardCharsets; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; +import org.springframework.http.MediaType; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.web.servlet.MockMvc; +import org.tailormap.api.annotation.PostgresIntegrationTest; + +/** These testcase run with a subset of the available formats. */ +@PostgresIntegrationTest +@AutoConfigureMockMvc +@TestPropertySource(properties = {"tailormap-api.extract.allowed-outputformats=csv,shape"}) +class LayerExtractControllerRestrictedFormatsIntegrationTest { + private static final String formatsPath = "/extract/formats"; + private static final String extractPath = "/extract/"; + private static final String downloadPath = "/extract/download/"; + + @Autowired + private MockMvc mockMvc; + + @Value("${tailormap-api.base-path}") + private String apiBasePath; + + @Test + void list_supported_formats() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + formatsPath; + mockMvc.perform(get(extractUrl).accept(MediaType.APPLICATION_JSON).with(setServletPath(extractUrl))) + .andExpect(status().isOk()) + .andExpect(result -> assertThat(result.getResponse().getContentAsString(), is("[\"csv\",\"shape\"]"))); + } + + @Test + void invalid_output_format_should_return_bad_request_on_extract() throws Exception { + final String validClientId = "format-test-" + System.nanoTime(); + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + validClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + // disallowed through properties + .param("outputFormat", "geojson") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isBadRequest()) + .andExpect(result -> + assertThat(result.getResponse().getContentAsString(), containsString("Invalid output format"))); + } + + @Test + void invalid_client_id_should_return_bad_request_on_extract() throws Exception { + final String invalidClientId = "invalid-te$t-" + System.nanoTime(); + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + invalidClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "csv") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isBadRequest()) + .andExpect(result -> + assertThat(result.getResponse().getContentAsString(), containsString("Invalid clientId"))); + } + + @Test + void invalid_download_id_should_return_bad_request_on_download() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + "invalidDownloadId"; + mockMvc.perform(get(extractUrl) + .accept(MediaType.APPLICATION_OCTET_STREAM) + .with(setServletPath(extractUrl))) + .andExpect(status().isNotFound()) + .andExpect(result -> assertThat( + result.getResponse().getContentAsString(), containsString("Download file not found"))); + } + + @Test + void wms_secured_proxy_not_in_public_app_should_be_forbidden() throws Exception { + final String validClientId = "format-test-" + System.nanoTime(); + final String extractUrl = apiBasePath + layerProxiedWithAuthInPublicApp + extractPath + validClientId; + + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "csv") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andDo(print()) + .andExpect(status().isForbidden()); + } +} diff --git a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java index a3fa19c06f..574bc19640 100644 --- a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java @@ -66,7 +66,7 @@ void should_send_keep_alive_messages_for_two_minutes() { Awaitility.await("waiting for keep-alive messages") .pollDelay(45, SECONDS) .pollInterval(15, SECONDS) - .atLeast(2, MINUTES) + .atLeast(1, MINUTES) .atMost(130, SECONDS) .logging(logPrinter -> logger.debug("Checking for keep-alive messages in SSE stream... {}", logPrinter)) .untilAsserted(() -> { diff --git a/src/test/java/org/tailormap/api/util/UUIDv7Test.java b/src/test/java/org/tailormap/api/util/UUIDv7Test.java new file mode 100644 index 0000000000..c066b630e9 --- /dev/null +++ b/src/test/java/org/tailormap/api/util/UUIDv7Test.java @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.util; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.closeTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +class UUIDv7Test { + @Test + void testExtractUuid() throws InterruptedException { + ArrayList uuids = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + Thread.sleep(10); // Ensure different timestamps for each UUID + UUID uuid = UUIDv7.randomV7(); + assertNotNull(uuid); + uuids.add(uuid); + assertEquals(7, uuid.version(), () -> "Expected version 7, got " + uuid.version()); + assertEquals(2, uuid.variant(), () -> "Expected RFC 4122 variant, got " + uuid.variant()); + assertThat((double) (Instant.now().toEpochMilli() - UUIDv7.timestamp(uuid)), closeTo(0d, 100d)); + } + + UUID[] cloned = uuids.toArray(new UUID[0]).clone(); + uuids.sort(UUID::compareTo); + for (int i = 0; i < uuids.size(); i++) { + assertEquals(cloned[i], uuids.get(i), "Expected UUIDs to be in the same order after sorting " + i); + if (i > 0) { + assertTrue( + UUIDv7.timestampAsInstant(uuids.get(i)).isAfter(UUIDv7.timestampAsInstant(uuids.get(i - 1))), + "Expected timestamps to be in ascending order " + i); + } + } + } + + @Test + void testRoundTrip() { + UUID uuid = UUIDv7.randomV7(); + Instant timestamp = UUIDv7.timestampAsInstant(uuid); + assertNotNull(timestamp); + // uuid v4 check + assertNotNull(UUID.fromString(uuid.toString())); + + UUID parsed = UUIDv7.fromString(uuid.toString()); + assertNotNull(parsed); + assertEquals(7, parsed.version(), () -> "Expected version 7, got " + parsed.version()); + assertEquals(timestamp.toEpochMilli(), UUIDv7.timestamp(parsed)); + assertEquals(uuid, parsed); + } +} diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index cf62d01aeb..66dbb57bcd 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -2,7 +2,15 @@ tailormap-api.base-path=/api tailormap-api.admin.base-path=/api/admin management.endpoints.web.base-path=/api/actuator tailormap-api.new-admin-username=tm-admin +# deprecated tailormap-api.export.allowed-outputformats=application/geopackage+sqlite3,application/json +# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormattFormat for valid values +tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape +# any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk +# tailormap-api.extract.cleanup-minutes=120 +# the directory where the extract output files are stored, should be writable by the application +# tailormap-api.extract.location=/tmp + tailormap-api.timeout=5000 tailormap-api.management.hashed-password=#{null} # A list of allowed metrics for the ingest metrics endpoint; do use lowercase and underscores to separate words. From 829378381f64e54774560148185330f490c12b12 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 10 Apr 2026 18:54:08 +0200 Subject: [PATCH 05/17] HTM-1961 | HTM-1962: apply code review comments --- .../controller/LayerExtractController.java | 6 ++++-- .../service/CreateLayerExtractService.java | 19 +++++++++++++------ src/main/resources/application.properties | 2 +- .../resources/openapi/status-responses.yaml | 6 +++--- src/main/resources/openapi/viewer-api.yaml | 15 +++++---------- ...LayerExtractControllerIntegrationTest.java | 2 +- ...ollerRestrictedFormatsIntegrationTest.java | 11 ++++++++++- src/test/resources/application.properties | 2 +- 8 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java index 0d492fb463..64894f9d80 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExtractController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -69,7 +69,7 @@ public LayerExtractController( /** * Download the result of an extract request. The extract generation should be initiated first by a POST to - * {@code /{viewerKind}/{viewerName}/layer/{appLayerId}/extract}. + * {@code /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}}. */ @GetMapping(path = "/download/{downloadId}") @Counted(value = "tailormap_api_extract_download", description = "Count of layer extract downloads") @@ -119,7 +119,9 @@ public ResponseEntity formats( @ModelAttribute GeoService service, @ModelAttribute Application application, @ModelAttribute AppTreeLayerNode appTreeLayerNode) { - return ResponseEntity.ok(allowedExtractOutputFormats); + return ResponseEntity.ok(allowedExtractOutputFormats.stream() + .map(ExtractOutputFormat::getValue) + .toList()); } @Transactional diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 084a6f039f..e8d3353e61 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -159,8 +159,9 @@ public void validateClientId(@NonNull String clientId) throws IllegalArgumentExc /** * Create a validated filename for an extract. The naming follows the pattern * {@code "%s_%s_%s.%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), outputFormat.getExtension()) } - * where the first part is the source feature type name, the second part is the SSE client id, the third part is a - * random UUIDv7 and the fourth part is the file extension based on the requested output format. + * where the first part is the source feature type name (this is cleaned from some characters), the second part is + * the SSE client id, the third part is a random UUIDv7 and the fourth part is the file extension based on the + * requested output format. * * @param clientId the SSE client id * @param sourceFT the source featuretype for the extract @@ -180,6 +181,9 @@ public String createExtractFilename( if (cleanFTName.contains(":")) { // clip off the WFS namespace part cleanFTName = cleanFTName.substring(cleanFTName.lastIndexOf(":") + 1); + // remove: . _ which are used as separators in the filename and could cause issues when parsing the filename + // later on + cleanFTName = cleanFTName.replaceAll("[._]", ""); } return "%s_%s_%s.%s".formatted(cleanFTName, clientId, UUIDv7.randomV7(), outputFormat.getExtension()); } @@ -197,13 +201,16 @@ public void createLayerExtract( @NonNull String outputFileName) { SimpleFeatureSource inputFeatureSource = null; - this.emitProgress(clientId, outputFileName, 0, false, null); + this.emitProgress(clientId, outputFileName, 0, false, "Starting extract"); try (Transaction outputTransaction = new DefaultTransaction("tailormap-extract-output")) { inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(inputTmFeatureType); Query q = new Query(inputFeatureSource.getName().toString()); - q.setPropertyNames(attributes.toArray(new String[0])); + if (!attributes.isEmpty()) { + q.setPropertyNames(attributes.toArray(new String[0])); + } + if (!StringUtils.isBlank(filterCQL)) { Filter filter = ECQL.toFilter(filterCQL); q.setFilter(filter); @@ -213,8 +220,8 @@ public void createLayerExtract( } final int featCount = inputFeatureSource.getCount(q); - AtomicInteger featsAdded = new AtomicInteger(); logger.debug("Filtered source counts {}", featCount); + final AtomicInteger featsAdded = new AtomicInteger(); FileDataStore outputDataStore = getExtractDataStore(extractOutputFormat, outputFileName, clientId); SimpleFeatureType fType = @@ -245,7 +252,7 @@ public void createLayerExtract( logger.error("Output datastore is not a SimpleFeatureStore, cannot write features"); } outputDataStore.dispose(); - this.emitProgress(clientId, outputFileName, 100, true, null); + this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); } catch (IOException | CQLException | SchemaException e) { emitError(clientId, e.getMessage()); logger.error("Creating extract failed", e); diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index ec8813cb6d..428fce1955 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -32,7 +32,7 @@ tailormap-api.feature.info.maxitems=30 # deprecated tailormap-api.export.allowed-outputformats=csv,text/csv,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,excel2007,application/vnd.shp,application/x-zipped-shp,SHAPE-ZIP,application/geopackage+sqlite3,application/x-gpkg,geopackage,geopkg,gpkg,application/geo+json,application/geojson,application/json,json,DXF-ZIP -# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormattFormat for valid values +# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk # tailormap-api.extract.cleanup-minutes=120 diff --git a/src/main/resources/openapi/status-responses.yaml b/src/main/resources/openapi/status-responses.yaml index 4882ed0d21..1ddf35f6ab 100644 --- a/src/main/resources/openapi/status-responses.yaml +++ b/src/main/resources/openapi/status-responses.yaml @@ -81,14 +81,14 @@ components: type: object nullable: true id: - description: 'optional event identifier, can be used for event ordering and deduplication' + description: 'optional event identifier (UUIDv7), can be used for event ordering and deduplication' type: string format: uuid nullable: true example: - eventType: 'extract-started' + eventType: 'extract-progress' details: - status: 'started' message: 'Extracting data' progress: 17 + downloadId: '123e4567-e89b-12d3-a456-426614174001.csv' id: '123e4567-e89b-12d3-a456-426614174000' diff --git a/src/main/resources/openapi/viewer-api.yaml b/src/main/resources/openapi/viewer-api.yaml index 556a7d23e1..10c39bc4f1 100644 --- a/src/main/resources/openapi/viewer-api.yaml +++ b/src/main/resources/openapi/viewer-api.yaml @@ -1820,7 +1820,7 @@ paths: in: path required: true description: 'A client-generated identifier; - this must be the same as the clientId used in eg. the `/extract` request to correlate the events with the extract request. + this must be the same as the clientId used in eg. the `/{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}` request to correlate the events with the extract request. The format should use the "Nano ID" format, for example `V1StGXR8_Z5jdHi6B-myT`.' schema: type: string @@ -1871,7 +1871,7 @@ paths: type: array items: type: string - example: '["csv","shape.zip"]' + example: '["csv","shape"]' /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}: description: 'Export the attributes as shown in the attribute list for a layer.' @@ -1944,8 +1944,8 @@ paths: responses: '202': description: 'Export started/queued. The client should listen to the `/events/` stream and wait for an - `extract-completed` event with `status: completed` to know when the export is finished and the file is ready - to be downloaded. If the connection is closed before that, the export may be cancelled.' + `extract-completed` event with `message: Extract task completed` to know when the export is finished and the + file is ready to be downloaded. If the connection is closed before that, the export may be cancelled.' content: application/json: schema: @@ -1957,12 +1957,7 @@ paths: required: - message example: - eventType: 'extract-started' - id: '123e4567-e89b-12d3-a456-426614174000' - details: - status: started - message: 'Extracting data' - progress: 50 + message: 'Extract request accepted' downloadId: '123e4567-e89b-12d3-a456-426614174001.csv' '400': description: 'Bad Request. May be returned for some combination of parameters that can not be processed or are incomplete.' diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index bf94c24ef2..5871b6aaa8 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -228,7 +228,7 @@ void should_export_wfs_to_csv_with_authentication() throws Exception { final String extractedDownloadId = getDownloadId(lastCompletedEventJson); assertThat(extractedDownloadId, containsString(".csv")); - final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + final String downloadUrl = apiBasePath + layerProxiedWithAuthInPublicApp + downloadPath + extractedDownloadId; MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) .andExpect(status().isOk()) .andExpect(result -> { diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java index 711dd5ea0c..774687d242 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java @@ -12,6 +12,7 @@ import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; import static org.springframework.test.web.servlet.result.MockMvcResultHandlers.print; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import static org.tailormap.api.TestRequestProcessor.setServletPath; import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; @@ -52,7 +53,15 @@ void list_supported_formats() throws Exception { @Test void invalid_output_format_should_return_bad_request_on_extract() throws Exception { - final String validClientId = "format-test-" + System.nanoTime(); + final String validClientId = "invalid_output_format-" + System.nanoTime(); + final String sseUrl = apiBasePath + "/events/" + validClientId; + mockMvc.perform(get(sseUrl) + .accept(MediaType.TEXT_EVENT_STREAM) + .with(setServletPath(sseUrl)) + .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(request().asyncStarted()) + .andReturn(); + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + validClientId; mockMvc.perform(post(extractUrl) .accept(MediaType.APPLICATION_JSON) diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 66dbb57bcd..7d5c7717e3 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -4,7 +4,7 @@ management.endpoints.web.base-path=/api/actuator tailormap-api.new-admin-username=tm-admin # deprecated tailormap-api.export.allowed-outputformats=application/geopackage+sqlite3,application/json -# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormattFormat for valid values +# see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk # tailormap-api.extract.cleanup-minutes=120 From d833e9811b89c7e85dd792a82a0917eec8cbafb9 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:18:10 +0200 Subject: [PATCH 06/17] HTM-1963: Introduce org.geotools:gt-excel-writer --- pom.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pom.xml b/pom.xml index c63886bd6d..db64b6e294 100644 --- a/pom.xml +++ b/pom.xml @@ -319,6 +319,11 @@ SPDX-License-Identifier: MIT org.geotools gt-epsg-hsql + + org.geotools + gt-excel-writer + [35-SNAPSHOT,) + org.geotools gt-http @@ -683,6 +688,14 @@ SPDX-License-Identifier: MIT Releases hosted by OSGeo https://repo.osgeo.org/repository/release/ + + + true + + repo.b3p.nl + B3Partners public repository + https://repo.b3p.nl/nexus/repository/public/ + From 1b5ab8eb73da3e88170db7ebe723b4a70d19963c Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:29:18 +0200 Subject: [PATCH 07/17] HTM-1963: Add Excel extract output and integration tests --- .../service/CreateLayerExtractService.java | 21 ++++- ...LayerExtractControllerIntegrationTest.java | 91 +++++++++++++++++++ 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index e8d3353e61..31381191ac 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -51,6 +51,7 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.tailormap.api.controller.LayerExtractController; +import org.tailormap.api.geotools.data.excel.ExcelDataStoreFactory; import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper; import org.tailormap.api.persistence.TMFeatureType; import org.tailormap.api.util.UUIDv7; @@ -65,7 +66,6 @@ public class CreateLayerExtractService { private final SseEventBus eventBus; private final JsonMapper jsonMapper; private final FeatureSourceFactoryHelper featureSourceFactoryHelper; - private final FilterFactory ff = CommonFactoryFinder.getFilterFactory(GeoTools.getDefaultHints()); // we can safely use the tmp dir as a default here because we are running in a docker container so access is limited @@ -223,7 +223,8 @@ public void createLayerExtract( logger.debug("Filtered source counts {}", featCount); final AtomicInteger featsAdded = new AtomicInteger(); - FileDataStore outputDataStore = getExtractDataStore(extractOutputFormat, outputFileName, clientId); + FileDataStore outputDataStore = + getExtractDataStore(extractOutputFormat, outputFileName, clientId, inputTmFeatureType.getName()); SimpleFeatureType fType = DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); outputDataStore.createSchema(fType); @@ -268,7 +269,10 @@ public void createLayerExtract( } private FileDataStore getExtractDataStore( - LayerExtractController.ExtractOutputFormat extractOutputFormat, String outputFileName, String clientId) + LayerExtractController.ExtractOutputFormat extractOutputFormat, + String outputFileName, + String clientId, + String typeName) throws IOException { final File outputFile = Files.createFile(Path.of(exportFilesLocation, outputFileName)) @@ -302,8 +306,17 @@ private FileDataStore getExtractDataStore( true); return (FileDataStore) new CSVDataStoreFactory().createNewDataStore(params); } + case XLSX -> { + Map params = Map.of( + ExcelDataStoreFactory.FILE_PARAM.key, + outputFile, + ExcelDataStoreFactory.SHEET_PARAM.key, + // typeName could hve a prefix; for Excel sheet names ':' is disallowed, max length is 31 + typeName.substring(typeName.lastIndexOf(":") + 1, Math.min(typeName.length(), 31))); + return (FileDataStore) new ExcelDataStoreFactory().createNewDataStore(params); + } // TODO implement - case GEOJSON, XLSX, SHAPE -> { + case GEOJSON, SHAPE -> { emitError(clientId, "Output format " + extractOutputFormat + " is not yet supported"); logger.error("Output format {} is not yet supported", extractOutputFormat); throw new IOException("Unsupported output format: " + extractOutputFormat); diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index 5871b6aaa8..919415365b 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -12,6 +12,7 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.startsWith; +import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; @@ -22,8 +23,14 @@ import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; import org.awaitility.Awaitility; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.MethodOrderer; @@ -256,6 +263,90 @@ void should_export_wfs_to_csv_with_authentication() throws Exception { }); } + @Test + void should_export_large_filter_to_excel() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "xlsx") + .param("filter", StaticTestData.get("large_cql_filter")) + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(30, SECONDS).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(".xlsx")); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat( + contentType, + containsString("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + // open the Excel file and check that we have the expected content + try (InputStream inp = new ByteArrayInputStream(download.getResponse().getContentAsByteArray()); + Workbook wb = WorkbookFactory.create(inp)) { + Sheet sheet = wb.getSheetAt(0); + + assertEquals( + 18 + /*header row*/ 1, + sheet.getPhysicalNumberOfRows(), + () -> "Expected " + 18 + /*header row*/ 1 + + " rows in the Excel sheet, including header and 18 data rows"); + + assertAll( + "Check header and first data row", + () -> assertEquals( + "begroeidterreindeel", + sheet.getSheetName(), + "Expected sheet name to be begroeidterreindeel"), + () -> assertEquals( + 14, sheet.getRow(0).getPhysicalNumberOfCells(), "Expected 14 columns in the header row")); + + assertAll( + "Check first data row", + () -> assertEquals( + CellType.NUMERIC, + sheet.getRow(1).getCell(0).getCellType(), + "Expected first cell in header to be numeric (with date format)"), + () -> assertEquals( + CellType.STRING, + sheet.getRow(1).getCell(1).getCellType(), + "Expected second cell in header to be a string"), + () -> assertEquals("geenWaarde", sheet.getRow(1).getCell(1).getStringCellValue()), + () -> assertEquals("G0344", sheet.getRow(1).getCell(2).getStringCellValue())); + } + } + /** * Parse the last non-empty line from the SSE stream that looks something like: * {@code data:{"details":{"message":"Extract task From 46c2c82eb46d05b969372f9684dbf9bb9472818d Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Mon, 20 Apr 2026 17:22:53 +0200 Subject: [PATCH 08/17] HTM-1963: As defined in requirements: block export when Excel limits are exceeded --- .../controller/LayerExtractController.java | 64 ++++++++++++++++++- .../service/CreateLayerExtractService.java | 13 ++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java index 64894f9d80..76077ff812 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExtractController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -21,7 +21,13 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import org.geotools.api.data.Query; +import org.geotools.api.data.SimpleFeatureSource; +import org.geotools.api.filter.Filter; import org.geotools.api.filter.sort.SortOrder; +import org.geotools.filter.text.cql2.CQLException; +import org.geotools.filter.text.ecql.ECQL; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; @@ -40,6 +46,8 @@ import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.server.ResponseStatusException; import org.tailormap.api.annotation.AppRestController; +import org.tailormap.api.geotools.data.excel.ExcelDataStore; +import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper; import org.tailormap.api.persistence.Application; import org.tailormap.api.persistence.GeoService; import org.tailormap.api.persistence.TMFeatureType; @@ -57,14 +65,18 @@ public class LayerExtractController { private static final Pattern SAFE_DOWNLOAD_ID = Pattern.compile("^[A-Za-z0-9._-]+$"); private final FeatureSourceRepository featureSourceRepository; private final CreateLayerExtractService createLayerExtractService; + private final FeatureSourceFactoryHelper featureSourceFactoryHelper; @Value("#{'${tailormap-api.extract.allowed-outputformats}'.split(',')}") private List allowedExtractOutputFormats; public LayerExtractController( - FeatureSourceRepository featureSourceRepository, CreateLayerExtractService createLayerExtractService) { + FeatureSourceRepository featureSourceRepository, + CreateLayerExtractService createLayerExtractService, + FeatureSourceFactoryHelper featureSourceFactoryHelper) { this.featureSourceRepository = featureSourceRepository; this.createLayerExtractService = createLayerExtractService; + this.featureSourceFactoryHelper = featureSourceFactoryHelper; } /** @@ -186,6 +198,10 @@ public ResponseEntity extract( attributes.add(sourceFT.getDefaultGeometryAttribute()); } + if (outputFormat == ExtractOutputFormat.XLSX) { + validateExcelLimits(sourceFT, attributes, filter); + } + SortOrder sortingOrder = SortOrder.ASCENDING; if (null != sortOrder && (sortOrder.equalsIgnoreCase("desc") || sortOrder.equalsIgnoreCase("asc"))) { sortingOrder = SortOrder.valueOf(sortOrder.toUpperCase(Locale.ROOT)); @@ -204,6 +220,52 @@ public ResponseEntity extract( .body(Map.of("message", "Extract request accepted", "downloadId", outputFileName)); } + /** + * Check that neither the number of columns nor the number of rows requested for the extract exceed the limits of + * Excel format. This is required to block extract requests that would fail later on in the ExcelFeatureWriter when + * the limits are exceeded. NOTE: cell size limits are handled in the ExcelFeatureWriter. + * + * @param featureType requested FT + * @param attributes requested attributes + * @param filterCQL requested filter + */ + private void validateExcelLimits(TMFeatureType featureType, Set attributes, String filterCQL) { + if (attributes.size() > ExcelDataStore.getMaxColumns()) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + "Excel format does not support more than " + ExcelDataStore.getMaxColumns() + " columns"); + } + SimpleFeatureSource inputFeatureSource = null; + try { + // count all the features; this is expensive but required to block extract when the Excel limits for + // row/columns are exceeded + inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(featureType); + Query q = new Query(inputFeatureSource.getName().toString()); + if (!attributes.isEmpty()) { + q.setPropertyNames(attributes.toArray(new String[0])); + } + + if (!StringUtils.isBlank(filterCQL)) { + Filter filter = ECQL.toFilter(filterCQL); + q.setFilter(filter); + } + final int featCount = inputFeatureSource.getCount(q); + if (featCount >= ExcelDataStore.getMaxRows()) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + "Excel format does not support more than " + ExcelDataStore.getMaxRows() + " rows"); + } + } catch (CQLException | IOException e) { + throw new ResponseStatusException( + HttpStatus.INTERNAL_SERVER_ERROR, + "Failed to count all features for Excel extract: " + e.getMessage()); + } finally { + if (inputFeatureSource != null) { + inputFeatureSource.getDataStore().dispose(); + } + } + } + public enum ExtractOutputFormat { CSV("csv", "csv"), GEOJSON("geojson", "json"), diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 31381191ac..9153b657ca 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -50,7 +50,9 @@ import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.server.ResponseStatusException; import org.tailormap.api.controller.LayerExtractController; +import org.tailormap.api.geotools.data.excel.ExcelDataStore; import org.tailormap.api.geotools.data.excel.ExcelDataStoreFactory; import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper; import org.tailormap.api.persistence.TMFeatureType; @@ -221,6 +223,17 @@ public void createLayerExtract( final int featCount = inputFeatureSource.getCount(q); logger.debug("Filtered source counts {}", featCount); + if (featCount >= ExcelDataStore.getMaxRows()) { + this.emitError( + clientId, + "Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format." + .formatted(featCount, ExcelDataStore.getMaxRows())); + throw new ResponseStatusException( + org.springframework.http.HttpStatus.BAD_REQUEST, + "Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format." + .formatted(featCount, ExcelDataStore.getMaxRows())); + } + final AtomicInteger featsAdded = new AtomicInteger(); FileDataStore outputDataStore = From 5b148c51b3e6039e3974f1630c0bd26fed94a86b Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:07:39 +0200 Subject: [PATCH 09/17] HTM-1964: Add GeoJSON extract format (#1702) For now use GeoTools 35-SNAPSHOT to make geojson export work, see https://osgeo-org.atlassian.net/browse/GEOT-7894 --- pom.xml | 12 +++ .../controller/LayerExtractController.java | 8 +- .../service/CreateLayerExtractService.java | 25 +++++-- ...LayerExtractControllerIntegrationTest.java | 73 +++++++++++++++++-- 4 files changed, 100 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index db64b6e294..e5fc76965d 100644 --- a/pom.xml +++ b/pom.xml @@ -324,6 +324,10 @@ SPDX-License-Identifier: MIT gt-excel-writer [35-SNAPSHOT,) + + org.geotools + gt-geojson-store + org.geotools gt-http @@ -696,6 +700,14 @@ SPDX-License-Identifier: MIT B3Partners public repository https://repo.b3p.nl/nexus/repository/public/ + + + true + + OSGeo-snapshots + Snapshots hosted by OSGeo + https://repo.osgeo.org/repository/snapshot/ + diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java index 76077ff812..1542c5badb 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExtractController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -267,10 +267,10 @@ private void validateExcelLimits(TMFeatureType featureType, Set attribut } public enum ExtractOutputFormat { - CSV("csv", "csv"), - GEOJSON("geojson", "json"), - XLSX("xlsx", "xlsx"), - SHAPE("shape", "zip"); + CSV("csv", ".csv"), + GEOJSON("geojson", ".geojson"), + XLSX("xlsx", ".xlsx"), + SHAPE("shape", ".zip"); private final String value; private final String extension; diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 9153b657ca..d100e1a29a 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -36,6 +36,7 @@ import org.geotools.data.DataUtilities; import org.geotools.data.DefaultTransaction; import org.geotools.data.csv.CSVDataStoreFactory; +import org.geotools.data.geojson.store.GeoJSONDataStoreFactory; import org.geotools.factory.CommonFactoryFinder; import org.geotools.feature.SchemaException; import org.geotools.filter.text.cql2.CQLException; @@ -183,11 +184,11 @@ public String createExtractFilename( if (cleanFTName.contains(":")) { // clip off the WFS namespace part cleanFTName = cleanFTName.substring(cleanFTName.lastIndexOf(":") + 1); - // remove: . _ which are used as separators in the filename and could cause issues when parsing the filename - // later on + // remove: '.' and '_' which are used as separators in the filename and could cause issues when parsing the + // filename later on cleanFTName = cleanFTName.replaceAll("[._]", ""); } - return "%s_%s_%s.%s".formatted(cleanFTName, clientId, UUIDv7.randomV7(), outputFormat.getExtension()); + return "%s_%s_%s%s".formatted(cleanFTName, clientId, UUIDv7.randomV7(), outputFormat.getExtension()); } @Async("extractTaskExecutor") @@ -241,7 +242,10 @@ public void createLayerExtract( SimpleFeatureType fType = DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); outputDataStore.createSchema(fType); - + // as a workaround for https://osgeo-org.atlassian.net/browse/GEOT-7894 we could instead call + // if (outputDataStore.getFeatureSource(fType.getName()) instanceof SimpleFeatureStore featureStore) { + // but I'd rather wait for a release of geotools with a fix for that issue, because it does not work with + // the CSV store if (outputDataStore.getFeatureSource() instanceof SimpleFeatureStore featureStore) { featureStore.setTransaction(outputTransaction); featureStore.addFeatureListener(event -> { @@ -261,12 +265,13 @@ public void createLayerExtract( }); featureStore.addFeatures(inputFeatureSource.getFeatures(q)); outputTransaction.commit(); + this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); + outputDataStore.dispose(); } else { + outputDataStore.dispose(); this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features"); logger.error("Output datastore is not a SimpleFeatureStore, cannot write features"); } - outputDataStore.dispose(); - this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); } catch (IOException | CQLException | SchemaException e) { emitError(clientId, e.getMessage()); logger.error("Creating extract failed", e); @@ -324,12 +329,16 @@ private FileDataStore getExtractDataStore( ExcelDataStoreFactory.FILE_PARAM.key, outputFile, ExcelDataStoreFactory.SHEET_PARAM.key, - // typeName could hve a prefix; for Excel sheet names ':' is disallowed, max length is 31 + // typeName could have a prefix; for Excel sheet names ':' is disallowed, max length is 31 typeName.substring(typeName.lastIndexOf(":") + 1, Math.min(typeName.length(), 31))); return (FileDataStore) new ExcelDataStoreFactory().createNewDataStore(params); } + case GEOJSON -> { + Map params = Map.of(GeoJSONDataStoreFactory.FILE_PARAM.key, outputFile); + return (FileDataStore) new GeoJSONDataStoreFactory().createNewDataStore(params); + } // TODO implement - case GEOJSON, SHAPE -> { + case SHAPE -> { emitError(clientId, "Output format " + extractOutputFormat + " is not yet supported"); logger.error("Output format {} is not yet supported", extractOutputFormat); throw new IOException("Unsupported output format: " + extractOutputFormat); diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index 919415365b..cff7d48233 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -9,6 +9,7 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.startsWith; @@ -17,9 +18,12 @@ import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import static org.tailormap.api.TestRequestProcessor.setServletPath; +import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.CSV; +import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.GEOJSON; import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; @@ -91,7 +95,7 @@ void should_export_large_filter_to_csv() throws Exception { .with(setServletPath(extractUrl)) .with(csrf()) .param("attributes", "") - .param("outputFormat", "csv") + .param("outputFormat", CSV.getValue()) .param("filter", StaticTestData.get("large_cql_filter")) .acceptCharset(StandardCharsets.UTF_8) .characterEncoding(StandardCharsets.UTF_8) @@ -115,7 +119,7 @@ void should_export_large_filter_to_csv() throws Exception { assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); final String extractedDownloadId = getDownloadId(lastCompletedEventJson); - assertThat(extractedDownloadId, containsString(".csv")); + assertThat(extractedDownloadId, endsWith(CSV.getExtension())); final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) @@ -145,7 +149,7 @@ void should_export_large_output_to_csv() throws Exception { .with(setServletPath(extractUrl)) .with(csrf()) .param("attributes", "identificatie, class") - .param("outputFormat", "csv") + .param("outputFormat", CSV.getValue()) .acceptCharset(StandardCharsets.UTF_8) .characterEncoding(StandardCharsets.UTF_8) .contentType(MediaType.APPLICATION_FORM_URLENCODED)) @@ -168,7 +172,7 @@ void should_export_large_output_to_csv() throws Exception { assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); final String extractedDownloadId = getDownloadId(lastCompletedEventJson); - assertThat(extractedDownloadId, containsString(".csv")); + assertThat(extractedDownloadId, endsWith(CSV.getExtension())); final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) @@ -210,7 +214,7 @@ void should_export_wfs_to_csv_with_authentication() throws Exception { .with(setServletPath(extractUrl)) .with(csrf()) .param("attributes", "geom,naam,code") - .param("outputFormat", "csv") + .param("outputFormat", CSV.getValue()) .acceptCharset(StandardCharsets.UTF_8) .characterEncoding(StandardCharsets.UTF_8) .contentType(MediaType.APPLICATION_FORM_URLENCODED)) @@ -233,7 +237,7 @@ void should_export_wfs_to_csv_with_authentication() throws Exception { assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); final String extractedDownloadId = getDownloadId(lastCompletedEventJson); - assertThat(extractedDownloadId, containsString(".csv")); + assertThat(extractedDownloadId, endsWith(CSV.getExtension())); final String downloadUrl = apiBasePath + layerProxiedWithAuthInPublicApp + downloadPath + extractedDownloadId; MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) @@ -347,6 +351,63 @@ void should_export_large_filter_to_excel() throws Exception { } } + @Test + void should_export_large_filter_to_geojson() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", GEOJSON.getValue()) + .param("filter", StaticTestData.get("large_cql_filter")) + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(30, SECONDS).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, endsWith(GEOJSON.getExtension())); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("application/geo+json")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andExpect(jsonPath("$.type").value("FeatureCollection")) + .andExpect(jsonPath("$.features.length()").value(18)) + .andExpect(jsonPath("$.features[0].type").value("Feature")) + .andExpect(jsonPath("$.features[0].geometry").isNotEmpty()) + .andExpect(jsonPath("$.features[0].properties.length()").value(13)) + .andExpect(jsonPath("$.features[0].properties.bronhouder").value("G0344")) + .andExpect(jsonPath("$.features[0].geometry.type").value("Polygon")) + // no CRS members + .andExpect(jsonPath("$.crs").doesNotHaveJsonPath()) + .andExpect(jsonPath("$.features[0].crs").doesNotHaveJsonPath()); + } + /** * Parse the last non-empty line from the SSE stream that looks something like: * {@code data:{"details":{"message":"Extract task From dafc2867840b08316299f8fea85a4937867bd87a Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:50:57 +0200 Subject: [PATCH 10/17] HTM-1965: Implement zipped shapefile extract --- pom.xml | 4 + .../controller/LayerExtractController.java | 28 +- .../ProgressReportingFeatureCollection.java | 45 +++ .../ProgressReportingFeatureIterator.java | 63 ++++ .../service/CreateLayerExtractService.java | 340 ++++++++++++++---- src/main/resources/application.properties | 2 + ...LayerExtractControllerIntegrationTest.java | 126 +++++++ ...ollerRestrictedFormatsIntegrationTest.java | 12 +- ...rogressReportingFeatureCollectionTest.java | 103 ++++++ .../ProgressReportingFeatureIteratorTest.java | 22 ++ src/test/resources/application.properties | 4 +- 11 files changed, 677 insertions(+), 72 deletions(-) create mode 100644 src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollection.java create mode 100644 src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIterator.java create mode 100644 src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollectionTest.java create mode 100644 src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIteratorTest.java diff --git a/pom.xml b/pom.xml index e5fc76965d..225df88d57 100644 --- a/pom.xml +++ b/pom.xml @@ -348,6 +348,10 @@ SPDX-License-Identifier: MIT org.geotools gt-referencing + + org.geotools + gt-shapefile + org.geotools gt-wfs-ng diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java index 1542c5badb..b9eb2acfc0 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExtractController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -28,6 +28,7 @@ import org.geotools.api.filter.sort.SortOrder; import org.geotools.filter.text.cql2.CQLException; import org.geotools.filter.text.ecql.ECQL; +import org.jspecify.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; @@ -198,8 +199,18 @@ public ResponseEntity extract( attributes.add(sourceFT.getDefaultGeometryAttribute()); } - if (outputFormat == ExtractOutputFormat.XLSX) { - validateExcelLimits(sourceFT, attributes, filter); + // check if filter has valid syntax (it could still be invalid wrt feature type) + Filter parsedCQL = null; + try { + if (!StringUtils.isBlank(filter)) { + parsedCQL = ECQL.toFilter(filter); + } + } catch (CQLException e) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid filter"); + } + + if (ExtractOutputFormat.XLSX.equals(outputFormat)) { + validateExcelLimits(sourceFT, attributes, parsedCQL); } SortOrder sortingOrder = SortOrder.ASCENDING; @@ -213,7 +224,7 @@ public ResponseEntity extract( //noinspection JvmTaintAnalysis Not a Path Traversal Sink because the clientId is validated this.createLayerExtractService.createLayerExtract( - clientId, sourceFT, attributes, filter, sortBy, sortingOrder, outputFormat, outputFileName); + clientId, sourceFT, attributes, parsedCQL, sortBy, sortingOrder, outputFormat, outputFileName); //noinspection JvmTaintAnalysis Not an XSS sink because the response is a json message return ResponseEntity.accepted() @@ -227,9 +238,9 @@ public ResponseEntity extract( * * @param featureType requested FT * @param attributes requested attributes - * @param filterCQL requested filter + * @param filter requested filter */ - private void validateExcelLimits(TMFeatureType featureType, Set attributes, String filterCQL) { + private void validateExcelLimits(TMFeatureType featureType, Set attributes, @Nullable Filter filter) { if (attributes.size() > ExcelDataStore.getMaxColumns()) { throw new ResponseStatusException( HttpStatus.BAD_REQUEST, @@ -245,8 +256,7 @@ private void validateExcelLimits(TMFeatureType featureType, Set attribut q.setPropertyNames(attributes.toArray(new String[0])); } - if (!StringUtils.isBlank(filterCQL)) { - Filter filter = ECQL.toFilter(filterCQL); + if (filter != null) { q.setFilter(filter); } final int featCount = inputFeatureSource.getCount(q); @@ -255,10 +265,12 @@ private void validateExcelLimits(TMFeatureType featureType, Set attribut HttpStatus.BAD_REQUEST, "Excel format does not support more than " + ExcelDataStore.getMaxRows() + " rows"); } - } catch (CQLException | IOException e) { + } catch (IOException e) { throw new ResponseStatusException( HttpStatus.INTERNAL_SERVER_ERROR, "Failed to count all features for Excel extract: " + e.getMessage()); + } catch (IllegalArgumentException e) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid filter"); } finally { if (inputFeatureSource != null) { inputFeatureSource.getDataStore().dispose(); diff --git a/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollection.java b/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollection.java new file mode 100644 index 0000000000..ed1ec18062 --- /dev/null +++ b/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollection.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.geotools.collection; + +import java.util.function.IntConsumer; +import org.geotools.data.simple.SimpleFeatureCollection; +import org.geotools.data.simple.SimpleFeatureIterator; +import org.geotools.feature.collection.DecoratingSimpleFeatureCollection; +import org.jspecify.annotations.Nullable; + +/** + * A decorating feature collection that will pass a callback to the iterator to report the number of features provided. + */ +public class ProgressReportingFeatureCollection extends DecoratingSimpleFeatureCollection { + private final int progressInterval; + private final IntConsumer progressCallback; + + /** + * Creates a new {@code ProgressReportingFeatureCollection} that wraps the given delegate and reports progress at + * the specified interval. + * + * @param delegate the underlying {@link SimpleFeatureCollection} to decorate + * @param progressInterval the number of features between each progress callback invocation; must be greater than + * {@code 0} + * @param progressCallback a callback that receives the current feature count at each interval; may be {@code null} + */ + public ProgressReportingFeatureCollection( + SimpleFeatureCollection delegate, int progressInterval, @Nullable IntConsumer progressCallback) { + super(delegate); + if (progressInterval <= 0) { + throw new IllegalArgumentException("progressInterval must be greater than 0"); + } + this.delegate = delegate; + this.progressInterval = progressInterval; + this.progressCallback = progressCallback; + } + + @Override + public SimpleFeatureIterator features() { + return new ProgressReportingFeatureIterator(delegate.features(), progressInterval, progressCallback); + } +} diff --git a/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIterator.java b/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIterator.java new file mode 100644 index 0000000000..2e1b26933a --- /dev/null +++ b/src/main/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIterator.java @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.geotools.collection; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.IntConsumer; +import org.geotools.api.feature.simple.SimpleFeature; +import org.geotools.data.simple.SimpleFeatureIterator; +import org.geotools.feature.collection.DecoratingSimpleFeatureIterator; +import org.jspecify.annotations.Nullable; + +/** A decorating feature iterator that will call a callback after a specified number of features are handled. */ +public class ProgressReportingFeatureIterator extends DecoratingSimpleFeatureIterator { + + private final AtomicInteger count = new AtomicInteger(0); + private final int progressInterval; + private final IntConsumer progressCallback; + private SimpleFeatureIterator iterator; + + /** + * Creates an iterator that reports progress after every configured number of processed features. + * + * @param iterator the wrapped feature iterator, must not be {@code null} + * @param progressInterval the number of processed features between progress updates, must be greater than {@code 0} + * @param progressCallback the callback that receives the current processed feature count; may be {@code null} + * @throws IllegalArgumentException if {@code progressInterval <= 0} + */ + public ProgressReportingFeatureIterator( + SimpleFeatureIterator iterator, int progressInterval, @Nullable IntConsumer progressCallback) { + super(iterator); + if (progressInterval <= 0) { + throw new IllegalArgumentException("progressInterval must be greater than 0"); + } + this.iterator = iterator; + this.progressInterval = progressInterval; + this.progressCallback = progressCallback; + } + + @Override + public SimpleFeature next() { + if (count.incrementAndGet() % progressInterval == 0) { + if (progressCallback != null) { + progressCallback.accept(count.get()); + } + } + return iterator.next(); + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public void close() { + iterator.close(); + iterator = null; + count.set(0); + } +} diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index d100e1a29a..9ef20634df 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -11,10 +11,12 @@ import java.io.IOException; import java.io.Serializable; import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.Instant; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -22,6 +24,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.apache.commons.lang3.StringUtils; import org.geotools.api.data.FeatureEvent; import org.geotools.api.data.FileDataStore; @@ -37,10 +41,9 @@ import org.geotools.data.DefaultTransaction; import org.geotools.data.csv.CSVDataStoreFactory; import org.geotools.data.geojson.store.GeoJSONDataStoreFactory; +import org.geotools.data.shapefile.ShapefileDumper; import org.geotools.factory.CommonFactoryFinder; import org.geotools.feature.SchemaException; -import org.geotools.filter.text.cql2.CQLException; -import org.geotools.filter.text.ecql.ECQL; import org.geotools.util.factory.GeoTools; import org.jspecify.annotations.NonNull; import org.jspecify.annotations.Nullable; @@ -53,6 +56,7 @@ import org.springframework.transaction.annotation.Transactional; import org.springframework.web.server.ResponseStatusException; import org.tailormap.api.controller.LayerExtractController; +import org.tailormap.api.geotools.collection.ProgressReportingFeatureCollection; import org.tailormap.api.geotools.data.excel.ExcelDataStore; import org.tailormap.api.geotools.data.excel.ExcelDataStoreFactory; import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper; @@ -71,13 +75,20 @@ public class CreateLayerExtractService { private final FeatureSourceFactoryHelper featureSourceFactoryHelper; private final FilterFactory ff = CommonFactoryFinder.getFilterFactory(GeoTools.getDefaultHints()); - // we can safely use the tmp dir as a default here because we are running in a docker container so access is limited + // we can safely use the tmp dir as a default here because we are running in a docker container without a shell so + // access is limited @Value("${tailormap-api.extract.location:#{systemProperties['java.io.tmpdir']}}") private String exportFilesLocation; @Value("${tailormap-api.extract.cleanup-minutes:120}") private int cleanupIntervalMinutes; + @Value("#{T(java.lang.Math).max(1, ${tailormap-api.extract.progress-report-interval:100})}") + private int progressReportInterval; + + @Value("${tailormap-api.features.wfs_count_exact:false}") + private boolean exactWfsCounts; + public CreateLayerExtractService( SseEventBus eventBus, JsonMapper jsonMapper, FeatureSourceFactoryHelper featureSourceFactoryHelper) { this.eventBus = eventBus; @@ -115,10 +126,9 @@ public void emitProgress( int progress, boolean completed, @Nullable String message) { - logger.debug("Emitting progress {} for layer with id {}", progress, clientId); - message = StringUtils.isBlank(message) ? "Extract task started" : message; fileId = StringUtils.isBlank(fileId) ? "" : fileId; + logger.debug("Emitting progress {}% for client [{}], message: '{}'", progress, clientId, message); eventBus.handleEvent(SseEvent.builder() .addClientId(clientId) @@ -139,7 +149,7 @@ public void emitProgress( } /** - * Check the sse client id is valid and exists. + * Check that the sse client id is valid and exists. * * @param clientId the SSE client id * @throws IllegalArgumentException when the SSE client id is invalid or not found on the event bus @@ -161,7 +171,7 @@ public void validateClientId(@NonNull String clientId) throws IllegalArgumentExc /** * Create a validated filename for an extract. The naming follows the pattern - * {@code "%s_%s_%s.%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), outputFormat.getExtension()) } + * {@code "%s_%s_%s%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), outputFormat.getExtension()) } * where the first part is the source feature type name (this is cleaned from some characters), the second part is * the SSE client id, the third part is a random UUIDv7 and the fourth part is the file extension based on the * requested output format. @@ -197,34 +207,52 @@ public void createLayerExtract( @NonNull String clientId, @NonNull TMFeatureType inputTmFeatureType, @NonNull Set attributes, - String filterCQL, + @Nullable Filter filter, String sortBy, SortOrder sortOrder, LayerExtractController.@NonNull ExtractOutputFormat extractOutputFormat, @NonNull String outputFileName) { - SimpleFeatureSource inputFeatureSource = null; this.emitProgress(clientId, outputFileName, 0, false, "Starting extract"); + switch (extractOutputFormat) { + case SHAPE -> + this.handleWithShapeDumper( + clientId, inputTmFeatureType, attributes, filter, sortBy, sortOrder, outputFileName); + case CSV, GEOJSON, XLSX -> + this.handleSingleFileFormats( + clientId, + inputTmFeatureType, + attributes, + filter, + sortBy, + sortOrder, + extractOutputFormat, + outputFileName); + } + } + + private void handleSingleFileFormats( + @NonNull String clientId, + @NonNull TMFeatureType inputTmFeatureType, + @NonNull Set attributes, + Filter filter, + String sortBy, + SortOrder sortOrder, + LayerExtractController.@NonNull ExtractOutputFormat extractOutputFormat, + @NonNull String outputFileName) { + + SimpleFeatureSource inputFeatureSource = null; + FileDataStore outputDataStore = null; try (Transaction outputTransaction = new DefaultTransaction("tailormap-extract-output")) { inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(inputTmFeatureType); - Query q = new Query(inputFeatureSource.getName().toString()); - if (!attributes.isEmpty()) { - q.setPropertyNames(attributes.toArray(new String[0])); - } + Query q = createQuery(inputFeatureSource, attributes, filter, sortBy, sortOrder); - if (!StringUtils.isBlank(filterCQL)) { - Filter filter = ECQL.toFilter(filterCQL); - q.setFilter(filter); - } - if (!StringUtils.isBlank(sortBy)) { - q.setSortBy(ff.sort(sortBy, Objects.requireNonNullElse(sortOrder, SortOrder.ASCENDING))); - } + int featCount = getFeatureCount(inputFeatureSource, q); - final int featCount = inputFeatureSource.getCount(q); - logger.debug("Filtered source counts {}", featCount); - if (featCount >= ExcelDataStore.getMaxRows()) { + if (extractOutputFormat == LayerExtractController.ExtractOutputFormat.XLSX + && featCount >= ExcelDataStore.getMaxRows()) { this.emitError( clientId, "Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format." @@ -235,17 +263,13 @@ public void createLayerExtract( .formatted(featCount, ExcelDataStore.getMaxRows())); } - final AtomicInteger featsAdded = new AtomicInteger(); - - FileDataStore outputDataStore = - getExtractDataStore(extractOutputFormat, outputFileName, clientId, inputTmFeatureType.getName()); + outputDataStore = this.getExtractDataStore( + extractOutputFormat, outputFileName, clientId, inputTmFeatureType.getName()); SimpleFeatureType fType = DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); outputDataStore.createSchema(fType); - // as a workaround for https://osgeo-org.atlassian.net/browse/GEOT-7894 we could instead call - // if (outputDataStore.getFeatureSource(fType.getName()) instanceof SimpleFeatureStore featureStore) { - // but I'd rather wait for a release of geotools with a fix for that issue, because it does not work with - // the CSV store + + final AtomicInteger featsAdded = new AtomicInteger(); if (outputDataStore.getFeatureSource() instanceof SimpleFeatureStore featureStore) { featureStore.setTransaction(outputTransaction); featureStore.addFeatureListener(event -> { @@ -253,7 +277,7 @@ public void createLayerExtract( featsAdded.getAndIncrement(); } if (featCount > 0) { - if (featsAdded.get() % 50 == 0) { + if (featsAdded.get() % progressReportInterval == 0) { this.emitProgress( clientId, outputFileName, @@ -272,10 +296,13 @@ public void createLayerExtract( this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features"); logger.error("Output datastore is not a SimpleFeatureStore, cannot write features"); } - } catch (IOException | CQLException | SchemaException e) { + } catch (IOException | SchemaException | IllegalArgumentException e) { emitError(clientId, e.getMessage()); logger.error("Creating extract failed", e); } finally { + if (outputDataStore != null) { + outputDataStore.dispose(); + } if (inputFeatureSource != null) { try { inputFeatureSource.getDataStore().dispose(); @@ -286,6 +313,34 @@ public void createLayerExtract( } } + private File getValidatedOutputFile(String outputFileName) throws IOException { + Path exportRoot = Path.of(exportFilesLocation).toRealPath(); + Path outputPath = exportRoot.resolve(outputFileName).normalize(); + if (!outputPath.startsWith(exportRoot)) { + throw new IOException("Invalid file path"); + } + Path createdFilePath = Files.createFile(outputPath).toRealPath(); + if (!createdFilePath.startsWith(exportRoot)) { + throw new IOException("Invalid file path"); + } + return createdFilePath.toFile(); + } + + /** + * Create a writable GeoTools {@link FileDataStore} for the requested extract format. The format must be must be + * supported by a {@link FileDataStore} implementation, for example CSV, Excel or GeoJSON. For unsupported formats + * (for example Shapefile) a custom handling is used in the calling method. + * + *

The output file is validated to ensure it is created under the configured extract location. + * + * @param extractOutputFormat the requested extract output format + * @param outputFileName the target output filename + * @param clientId the SSE client id, used for error reporting + * @param typeName the source feature type name, used to derive format-specific metadata (for example Excel sheet + * name) + * @return a newly created {@link FileDataStore} configured for the requested format + * @throws IOException when the output file path is invalid or the datastore cannot be created + */ private FileDataStore getExtractDataStore( LayerExtractController.ExtractOutputFormat extractOutputFormat, String outputFileName, @@ -293,15 +348,7 @@ private FileDataStore getExtractDataStore( String typeName) throws IOException { - final File outputFile = Files.createFile(Path.of(exportFilesLocation, outputFileName)) - .toFile() - .getCanonicalFile(); - if (!outputFile - .getPath() - .startsWith(Path.of(exportFilesLocation).toFile().getCanonicalPath())) { - throw new IOException("Invalid file path"); - } - + final File outputFile = getValidatedOutputFile(outputFileName); if (!logger.isDebugEnabled()) { // delete in production after JVM exit because the event bus will be reset when the JVM exits, and then we // are unlikely to have a reference to the file anymore. @@ -311,18 +358,18 @@ private FileDataStore getExtractDataStore( switch (extractOutputFormat) { case CSV -> { - Map params = Map.of( - CSVDataStoreFactory.FILE_PARAM.key, - outputFile, - CSVDataStoreFactory.STRATEGYP.key, - CSVDataStoreFactory.WKT_STRATEGY, - CSVDataStoreFactory.WKTP.key, - "the_geom_wkt", - CSVDataStoreFactory.WRITEPRJ.key, - false, - CSVDataStoreFactory.QUOTEALL.key, - true); - return (FileDataStore) new CSVDataStoreFactory().createNewDataStore(params); + return (FileDataStore) new CSVDataStoreFactory() + .createNewDataStore(Map.of( + CSVDataStoreFactory.FILE_PARAM.key, + outputFile, + CSVDataStoreFactory.STRATEGYP.key, + CSVDataStoreFactory.WKT_STRATEGY, + CSVDataStoreFactory.WKTP.key, + "the_geom_wkt", + CSVDataStoreFactory.WRITEPRJ.key, + false, + CSVDataStoreFactory.QUOTEALL.key, + true)); } case XLSX -> { Map params = Map.of( @@ -334,14 +381,8 @@ private FileDataStore getExtractDataStore( return (FileDataStore) new ExcelDataStoreFactory().createNewDataStore(params); } case GEOJSON -> { - Map params = Map.of(GeoJSONDataStoreFactory.FILE_PARAM.key, outputFile); - return (FileDataStore) new GeoJSONDataStoreFactory().createNewDataStore(params); - } - // TODO implement - case SHAPE -> { - emitError(clientId, "Output format " + extractOutputFormat + " is not yet supported"); - logger.error("Output format {} is not yet supported", extractOutputFormat); - throw new IOException("Unsupported output format: " + extractOutputFormat); + return (FileDataStore) new GeoJSONDataStoreFactory() + .createNewDataStore(Map.of(GeoJSONDataStoreFactory.FILE_PARAM.key, outputFile)); } default -> { // should never happen @@ -352,6 +393,121 @@ private FileDataStore getExtractDataStore( } } + private int getFeatureCount(SimpleFeatureSource source, Query query) throws IOException { + int count = source.getCount(query); + logger.debug("Filtered source counts {} features", count); + if (count < 0 && exactWfsCounts) { + count = source.getFeatures(query).size(); + } + return count; + } + + private void handleWithShapeDumper( + @NonNull String clientId, + @NonNull TMFeatureType inputTmFeatureType, + @NonNull Set attributes, + Filter filter, + String sortBy, + SortOrder sortOrder, + @NonNull String outputFileName) { + SimpleFeatureSource inputFeatureSource = null; + File outputDirectory = null; + try { + File outputFile = getValidatedOutputFile(outputFileName); + String baseName = outputFile + .getName() + .substring( + 0, + outputFile + .getName() + .lastIndexOf(LayerExtractController.ExtractOutputFormat.SHAPE.getExtension())); + outputDirectory = outputFile + .getParentFile() + .toPath() + .resolve(baseName) + .toFile() + .getCanonicalFile(); + if (logger.isDebugEnabled()) { + // delete in production after JVM exit because the event bus will be reset when the JVM exits, and then + // we + // are unlikely to have a reference to the file anymore. + // In debug/development mode we want to keep the directory for inspection. + outputDirectory.deleteOnExit(); + } + Files.createDirectories(outputDirectory.toPath()); + + ShapefileDumper dumper = new ShapefileDumper(outputDirectory); + dumper.setCharset(StandardCharsets.UTF_8); + dumper.setEmptyShapefileAllowed(false); + + inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(inputTmFeatureType); + + Query q = createQuery(inputFeatureSource, attributes, filter, sortBy, sortOrder); + + final int featCount = getFeatureCount(inputFeatureSource, q); + final boolean hasKnownFeatureCount = featCount > 0; + + AtomicInteger lastProgress = new AtomicInteger(0); + + dumper.dump(new ProgressReportingFeatureCollection( + inputFeatureSource.getFeatures(q), progressReportInterval, processed -> { + int progress = hasKnownFeatureCount ? (int) ((processed / (double) featCount) * 99) : 0; + lastProgress.set(progress); + String progressMessage = hasKnownFeatureCount + ? "Extracting shapes: %d/%d features processed".formatted(processed, featCount) + : "Extracting shapes: %d features processed".formatted(processed); + this.emitProgress(clientId, outputFileName, progress, false, progressMessage); + })); + this.emitProgress( + clientId, + outputFileName, + Math.max(99, lastProgress.get()), + false, + "Extract shapes dumped successfully"); + + zipDirectory(outputDirectory.toPath(), outputFile.toPath()); + this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); + } catch (IOException | IllegalArgumentException e) { + emitError(clientId, e.getMessage()); + logger.error("Creating extract failed", e); + } finally { + if (outputDirectory != null) { + try { + deleteDirectoryRecursively(outputDirectory.toPath()); + } catch (IOException e) { + logger.error("Failed to delete output directory {}", outputDirectory, e); + } + } + if (inputFeatureSource != null) { + try { + inputFeatureSource.getDataStore().dispose(); + } catch (Exception e) { + logger.warn("Error disposing datastore for feature source {}", inputFeatureSource.getName(), e); + } + } + } + } + + private Query createQuery( + SimpleFeatureSource inputFeatureSource, + Set attributes, + Filter filter, + String sortBy, + SortOrder sortOrder) { + Query q = new Query(inputFeatureSource.getName().toString()); + if (!attributes.isEmpty()) { + q.setPropertyNames(attributes.toArray(new String[0])); + } + + if (filter != null) { + q.setFilter(filter); + } + if (!StringUtils.isBlank(sortBy)) { + q.setSortBy(ff.sort(sortBy, Objects.requireNonNullElse(sortOrder, SortOrder.ASCENDING))); + } + return q; + } + /** * Cleanup expired extract files. Filenames are created in {@link CreateLayerExtractService#createExtractFilename } * and follow the pattern {@code "%s_%s_%s.%s".formatted(sourceFT.getName(), clientId, UUIDv7.randomV7(), @@ -384,6 +540,27 @@ public void cleanupExpiredExtracts() { } }); + try (Stream paths = Files.walk(Path.of(exportFilesLocation))) { + paths.filter(Files::isDirectory).forEach(path -> { + File file = path.toFile(); + String filename = file.getName(); + String[] parts = filename.split("[_]", -1); + if (parts.length < 3) { + logger.warn("Unexpected directory in extract location: {}", filename); + return; + } + String clientId = parts[1]; + if (!validClientIds.contains(clientId)) { + if (!file.delete()) { + logger.error("Failed to delete unattached extract file {}", filename); + } + } else { + Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2])); + clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId)); + } + }); + } + // delete any files are older than the cutoff clientFilesOnDisk.stream() .filter(f -> f.timestamp() @@ -400,5 +577,44 @@ public void cleanupExpiredExtracts() { } } + private void zipDirectory(Path sourceDir, Path zipFile) throws IOException { + try (ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(zipFile)); + Stream pathStream = Files.walk(sourceDir)) { + pathStream.filter(Files::isRegularFile).forEach(path -> { + String entryName = sourceDir.relativize(path).toString().replace(File.separatorChar, '/'); + try { + zos.putNextEntry(new ZipEntry(entryName)); + Files.copy(path, zos); + zos.closeEntry(); + } catch (IOException e) { + throw new RuntimeException("Failed to add file to zip: " + path, e); + } + }); + } catch (RuntimeException e) { + if (e.getCause() instanceof IOException ioException) { + throw ioException; + } + throw e; + } + } + + private void deleteDirectoryRecursively(Path directory) throws IOException { + try (Stream paths = Files.walk(directory)) { + paths.sorted(Comparator.reverseOrder()).forEach(path -> { + try { + logger.debug("Deleting path {}", path); + Files.deleteIfExists(path); + } catch (IOException e) { + throw new RuntimeException("Failed to delete path: " + path, e); + } + }); + } catch (RuntimeException e) { + if (e.getCause() instanceof IOException ioException) { + throw ioException; + } + throw e; + } + } + private record FileWithAttributes(File file, Instant timestamp, String clientId) {} } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 428fce1955..c3c5d2a09f 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -38,6 +38,8 @@ tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape # tailormap-api.extract.cleanup-minutes=120 # the directory where the extract output files are stored, should be writable by the application # tailormap-api.extract.location=/tmp +# the number of features after which a progress report is sent back to the viewer, to update the progress bar +# tailormap-api.extract.progress-report-interval=100 # proxy passthrough regex patterns for layer names, when empty no additional layers are allowed to be proxied # eg. use vw_t_gi_%s_[a-fA-F0-9]{32} to match `vw_t_gi_layername_70cae9814c6144808f1c9bb921099794` as a sub-layer of layername diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index cff7d48233..85d2f5e54b 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -24,6 +24,7 @@ import static org.tailormap.api.TestRequestProcessor.setServletPath; import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.CSV; import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.GEOJSON; +import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.SHAPE; import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; @@ -31,6 +32,10 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; @@ -203,6 +208,51 @@ void should_export_large_output_to_csv() throws Exception { }); } + @Test + void invalid_filter_should_return_bad_request_on_extract_request() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "csv") + .param("filter", "this does not parse") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isBadRequest()) + .andExpect(result -> + assertThat(result.getResponse().getContentAsString(), containsString("Invalid filter"))); + } + + @Test + void invalid_filter_should_fail_on_extract() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", "csv") + .param("filter", "does_not_exist IN ('1000')") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()) + .andExpect(result -> assertThat( + result.getResponse().getContentAsString(), containsString("Extract request accepted"))); + + Awaitility.await().atMost(30, SECONDS).untilAsserted(() -> { + assertThat( + sseResult.getResponse().getContentAsString(), + containsString("An error occurred during extract creation")); + assertThat( + sseResult.getResponse().getContentAsString(), + containsString("Property 'does_not_exist' could not be found in begroeidterreindeel")); + }); + } + @WithMockUser( username = "tm-admin", authorities = {"admin"}) @@ -408,6 +458,82 @@ void should_export_large_filter_to_geojson() throws Exception { .andExpect(jsonPath("$.features[0].crs").doesNotHaveJsonPath()); } + @Test + void should_export_large_filter_to_shape() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", SHAPE.getValue()) + .param("filter", StaticTestData.get("large_cql_filter")) + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(30, SECONDS).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, endsWith(SHAPE.getExtension())); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("application/zip")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + // open the downloaded zip file and check that we have the expected content: + // 6 files with name "begroeidterreindeelPolygon" and 6 different extensions + try (InputStream inp = new ByteArrayInputStream(download.getResponse().getContentAsByteArray()); + ZipInputStream zipInputStream = new ZipInputStream(inp, StandardCharsets.UTF_8)) { + Set fileNames = new HashSet<>(); + Set extensions = new HashSet<>(); + + ZipEntry entry; + while ((entry = zipInputStream.getNextEntry()) != null) { + if (entry.isDirectory()) { + continue; + } + + String entryName = entry.getName(); + String fileName = + entryName.contains("/") ? entryName.substring(entryName.lastIndexOf('/') + 1) : entryName; + fileNames.add(fileName); + + int lastDot = fileName.lastIndexOf('.'); + assertThat(lastDot, greaterThanOrEqualTo("begroeidterreindeelPolygon".length())); + assertThat(fileName, startsWith("begroeidterreindeelPolygon.")); + extensions.add(fileName.substring(lastDot + 1)); + } + + assertEquals(6, fileNames.size(), "Expected 6 files in the shapefile zip"); + assertEquals(6, extensions.size(), "Expected 6 unique file extensions in the shapefile zip"); + } + } + /** * Parse the last non-empty line from the SSE stream that looks something like: * {@code data:{"details":{"message":"Extract task diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java index 774687d242..2adde34a2a 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerRestrictedFormatsIntegrationTest.java @@ -48,6 +48,7 @@ void list_supported_formats() throws Exception { final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + formatsPath; mockMvc.perform(get(extractUrl).accept(MediaType.APPLICATION_JSON).with(setServletPath(extractUrl))) .andExpect(status().isOk()) + // formats are configured above .andExpect(result -> assertThat(result.getResponse().getContentAsString(), is("[\"csv\",\"shape\"]"))); } @@ -68,7 +69,7 @@ void invalid_output_format_should_return_bad_request_on_extract() throws Excepti .with(setServletPath(extractUrl)) .with(csrf()) .param("attributes", "") - // disallowed through properties + // disallowed through properties configured above .param("outputFormat", "geojson") .acceptCharset(StandardCharsets.UTF_8) .characterEncoding(StandardCharsets.UTF_8) @@ -107,6 +108,15 @@ void invalid_download_id_should_return_bad_request_on_download() throws Exceptio result.getResponse().getContentAsString(), containsString("Download file not found"))); } + @Test + void invalid_download_id_2_should_return_bad_request_on_download() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + "./invalidDownloadId"; + mockMvc.perform(get(extractUrl) + .accept(MediaType.APPLICATION_OCTET_STREAM) + .with(setServletPath(extractUrl))) + .andExpect(status().isBadRequest()); + } + @Test void wms_secured_proxy_not_in_public_app_should_be_forbidden() throws Exception { final String validClientId = "format-test-" + System.nanoTime(); diff --git a/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollectionTest.java b/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollectionTest.java new file mode 100644 index 0000000000..96883ffa90 --- /dev/null +++ b/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureCollectionTest.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.geotools.collection; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.Date; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; +import org.geotools.api.data.SimpleFeatureSource; +import org.geotools.api.feature.simple.SimpleFeatureType; +import org.geotools.data.DataUtilities; +import org.geotools.data.memory.MemoryDataStore; +import org.geotools.data.simple.SimpleFeatureCollection; +import org.geotools.data.simple.SimpleFeatureIterator; +import org.geotools.feature.SchemaException; +import org.geotools.feature.simple.SimpleFeatureBuilder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.GeometryFactory; + +class ProgressReportingFeatureCollectionTest { + private static final int randomFeatureCount = 104; + private SimpleFeatureSource randomFeatureSource = null; + + @BeforeEach + void createRandomFeatures() throws IOException, SchemaException { + SimpleFeatureType inputType = + DataUtilities.createType("test", "id:Integer,label:String,date:Date,location:Point:28992"); + MemoryDataStore dataStore = new MemoryDataStore(inputType); + + int[] xCoords = new Random().ints(randomFeatureCount, 155000, 165000).toArray(); + int[] yCoords = new Random().ints(randomFeatureCount, 463000, 473000).toArray(); + long minEpoch = LocalDate.of(2000, 1, 1).toEpochDay() * 86400L * 1000L; + long maxEpoch = LocalDate.of(2025, 12, 31).toEpochDay() * 86400L * 1000L; + final Random random = new Random(); + final SimpleFeatureBuilder fb = new SimpleFeatureBuilder(inputType); + final GeometryFactory gf = new GeometryFactory(); + IntStream.range(0, randomFeatureCount).forEach(id -> { + fb.set("id", id); + fb.set("label", "Feature number " + id); + @SuppressWarnings("JavaUtilDate") + Date randomDate = new Date(minEpoch + (long) (random.nextDouble() * (maxEpoch - minEpoch))); + fb.set("date", randomDate); + fb.set("location", gf.createPoint(new Coordinate(xCoords[id], yCoords[id]))); + dataStore.addFeature(fb.buildFeature(String.valueOf(id))); + }); + + randomFeatureSource = dataStore.getFeatureSource(inputType.getName()); + assumeTrue(randomFeatureSource != null, "Failed to create random feature source"); + } + + @AfterEach + void cleanup() { + if (randomFeatureSource != null) { + randomFeatureSource.getDataStore().dispose(); + } + } + + @Test + void test_progress_works() throws IOException, SchemaException { + SimpleFeatureCollection source = randomFeatureSource.getFeatures(); + AtomicInteger progressCount = new AtomicInteger(0); + ProgressReportingFeatureCollection collection = + new ProgressReportingFeatureCollection(source, 10, progressCount::set); + assertEquals(randomFeatureCount, collection.size()); + try (SimpleFeatureIterator iterator = collection.features()) { + while (iterator.hasNext()) { + iterator.next(); + } + assertEquals(10 * (randomFeatureCount / 10), progressCount.get()); + } + } + + @Test + void allows_null_progress() throws IOException { + SimpleFeatureCollection source = randomFeatureSource.getFeatures(); + ProgressReportingFeatureCollection collection = new ProgressReportingFeatureCollection(source, 10, null); + assertEquals(randomFeatureCount, collection.size()); + try (SimpleFeatureIterator iterator = collection.features()) { + while (iterator.hasNext()) { + iterator.next(); + } + } + } + + @Test + void disallows_negative_progress_interval() throws IOException { + SimpleFeatureCollection source = randomFeatureSource.getFeatures(); + assertThrows( + IllegalArgumentException.class, () -> new ProgressReportingFeatureCollection(source, -1, count -> {})); + } +} diff --git a/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIteratorTest.java b/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIteratorTest.java new file mode 100644 index 0000000000..39b770b251 --- /dev/null +++ b/src/test/java/org/tailormap/api/geotools/collection/ProgressReportingFeatureIteratorTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.geotools.collection; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.geotools.data.simple.SimpleFeatureIterator; +import org.junit.jupiter.api.Test; + +class ProgressReportingFeatureIteratorTest { + @Test + void disallows_negative_progress_interval() { + assertThrows(IllegalArgumentException.class, () -> { + try (SimpleFeatureIterator ignored = new ProgressReportingFeatureIterator(null, -1, null)) { + // ignored + } + }); + } +} diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 7d5c7717e3..4b836a325c 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -6,8 +6,10 @@ tailormap-api.new-admin-username=tm-admin tailormap-api.export.allowed-outputformats=application/geopackage+sqlite3,application/json # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape +# the number of features after which a progress report is sent back to the viewer, to update the progress bar +tailormap-api.extract.progress-report-interval=10 # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk -# tailormap-api.extract.cleanup-minutes=120 +tailormap-api.extract.cleanup-minutes=15 # the directory where the extract output files are stored, should be writable by the application # tailormap-api.extract.location=/tmp From a37e5f1b06f9b651c9cf89a1f64e740b1bd429a2 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 1 May 2026 15:01:53 +0200 Subject: [PATCH 11/17] HTM-1981: cleanup deprecated `/export` endpoint --- pom.xml | 28 -- .../api/controller/LayerExportController.java | 379 ------------------ src/main/resources/application.properties | 4 - .../openapi/obsolete-viewer-schemas.yaml | 35 -- .../LayerExportControllerIntegrationTest.java | 279 ------------- src/test/resources/application.properties | 2 - 6 files changed, 727 deletions(-) delete mode 100644 src/main/java/org/tailormap/api/controller/LayerExportController.java delete mode 100644 src/main/resources/openapi/obsolete-viewer-schemas.yaml delete mode 100644 src/test/java/org/tailormap/api/controller/LayerExportControllerIntegrationTest.java diff --git a/pom.xml b/pom.xml index 225df88d57..7a89223e16 100644 --- a/pom.xml +++ b/pom.xml @@ -1253,34 +1253,6 @@ SPDX-License-Identifier: MIT true - - generate-obsolete-viewer-models - - generate - - - ${project.basedir}/src/main/resources/openapi/obsolete-viewer-schemas.yaml - spring - org.tailormap.api.viewer.model - spring-boot - - java8 - true - false - true - false - true - true - true - @Deprecated - - false - true - false - false - true - - generate-spec diff --git a/src/main/java/org/tailormap/api/controller/LayerExportController.java b/src/main/java/org/tailormap/api/controller/LayerExportController.java deleted file mode 100644 index 4e6862497a..0000000000 --- a/src/main/java/org/tailormap/api/controller/LayerExportController.java +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright (C) 2023 B3Partners B.V. - * - * SPDX-License-Identifier: MIT - */ -package org.tailormap.api.controller; - -import static org.springframework.web.bind.annotation.RequestMethod.GET; -import static org.springframework.web.bind.annotation.RequestMethod.POST; -import static org.tailormap.api.persistence.helper.TMFeatureTypeHelper.getConfiguredAttributes; -import static org.tailormap.api.util.HttpProxyUtil.passthroughResponseHeaders; - -import io.micrometer.core.annotation.Counted; -import io.micrometer.core.annotation.Timed; -import jakarta.servlet.http.HttpServletRequest; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.lang.invoke.MethodHandles; -import java.net.URI; -import java.net.http.HttpResponse; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.geotools.api.feature.type.AttributeDescriptor; -import org.geotools.data.wfs.WFSDataStore; -import org.geotools.data.wfs.WFSDataStoreFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.InputStreamResource; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.transaction.annotation.Transactional; -import org.springframework.util.CollectionUtils; -import org.springframework.util.LinkedMultiValueMap; -import org.springframework.util.MultiValueMap; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.ModelAttribute; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.server.ResponseStatusException; -import org.tailormap.api.annotation.AppRestController; -import org.tailormap.api.geotools.PreventLocalAllowNestedJarEntityResolver; -import org.tailormap.api.geotools.wfs.SimpleWFSHelper; -import org.tailormap.api.geotools.wfs.SimpleWFSLayerDescription; -import org.tailormap.api.geotools.wfs.WFSProxy; -import org.tailormap.api.persistence.Application; -import org.tailormap.api.persistence.GeoService; -import org.tailormap.api.persistence.TMFeatureSource; -import org.tailormap.api.persistence.TMFeatureType; -import org.tailormap.api.persistence.json.AppLayerSettings; -import org.tailormap.api.persistence.json.AppTreeLayerNode; -import org.tailormap.api.persistence.json.GeoServiceLayer; -import org.tailormap.api.persistence.json.GeoServiceProtocol; -import org.tailormap.api.persistence.json.ServiceAuthentication; -import org.tailormap.api.repository.FeatureSourceRepository; -import org.tailormap.api.viewer.model.LayerExportCapabilities; - -/** - * @deprecated This controller is deprecated and will be removed in a future release. Use the `/extract/` endpoint - * (TODO) instead, which provides more flexible data extraction capabilities and supports more data sources than - * just WFS. - */ -@AppRestController -@RequestMapping(path = "${tailormap-api.base-path}/{viewerKind}/{viewerName}/layer/{appLayerId}/export/") -@Deprecated(forRemoval = true) -public class LayerExportController { - private static final Logger logger = - LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - @Value("#{'${tailormap-api.export.allowed-outputformats}'.split(',')}") - private List allowedOutputFormats; - - private final FeatureSourceRepository featureSourceRepository; - private final WFSProxy wfsProxy = new WFSProxy(); - - public LayerExportController(FeatureSourceRepository featureSourceRepository) { - this.featureSourceRepository = featureSourceRepository; - } - - @Transactional - @GetMapping(path = "capabilities") - @Timed(value = "export_get_capabilities", description = "Get layer export capabilities") - public ResponseEntity capabilities( - @ModelAttribute GeoService service, @ModelAttribute GeoServiceLayer layer) { - - final LayerExportCapabilities capabilities = new LayerExportCapabilities().exportable(false); - - TMFeatureType tmft = service.findFeatureTypeForLayer(layer, featureSourceRepository); - - if (tmft != null) { - WFSTypeNameDescriptor wfsTypeNameDescriptor = findWFSFeatureType(service, layer, tmft); - - if (wfsTypeNameDescriptor != null) { - try { - List outputFormats = SimpleWFSHelper.getOutputFormats( - wfsTypeNameDescriptor.wfsUrl(), - wfsTypeNameDescriptor.typeName(), - wfsTypeNameDescriptor.username(), - wfsTypeNameDescriptor.password()); - capabilities.setOutputFormats(outputFormats); - } catch (Exception e) { - String msg = "Error getting capabilities for WFS \"%s\"".formatted(wfsTypeNameDescriptor.wfsUrl()); - if (logger.isTraceEnabled()) { - logger.trace(msg, e); - } else { - logger.warn("{}: {}: {}", msg, e.getClass(), e.getMessage()); - } - capabilities.setOutputFormats(null); - } - } - capabilities.setExportable(capabilities.getOutputFormats() != null - && !capabilities.getOutputFormats().isEmpty()); - } - - return ResponseEntity.status(HttpStatus.OK).body(capabilities); - } - - @Transactional - @RequestMapping( - path = "download", - method = {GET, POST}) - @Counted(value = "export_download", description = "Count of layer downloads") - public ResponseEntity download( - @ModelAttribute GeoService service, - @ModelAttribute GeoServiceLayer layer, - @ModelAttribute Application application, - @ModelAttribute AppTreeLayerNode appTreeLayerNode, - @RequestParam String outputFormat, - @RequestParam(required = false) Set attributes, - @RequestParam(required = false) String filter, - @RequestParam(required = false) String sortBy, - @RequestParam(required = false) String sortOrder, - @RequestParam(required = false) String crs, - HttpServletRequest request) { - - // Validate outputFormat - if (!allowedOutputFormats.contains(outputFormat)) { - logger.warn("Invalid output format requested: {}", outputFormat); - return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Invalid output format"); - } - - TMFeatureType tmft = service.findFeatureTypeForLayer(layer, featureSourceRepository); - AppLayerSettings appLayerSettings = application.getAppLayerSettings(appTreeLayerNode); - - if (tmft == null) { - logger.debug("Layer export requested for layer without feature type"); - throw new ResponseStatusException(HttpStatus.NOT_FOUND); - } - - // Find a WFS feature type either because it is configured in Tailormap or by a SLD - // DescribeLayer request - WFSTypeNameDescriptor wfsTypeNameDescriptor = findWFSFeatureType(service, layer, tmft); - - if (wfsTypeNameDescriptor == null) { - throw new ResponseStatusException( - HttpStatus.SERVICE_UNAVAILABLE, "No suitable WFS available for layer export"); - } - - if (attributes == null) { - attributes = new HashSet<>(); - } - - // Get attributes in configured or original order - Set nonHiddenAttributes = - getConfiguredAttributes(tmft, appLayerSettings).keySet(); - - if (!attributes.isEmpty()) { - // Only export non-hidden property names - if (!nonHiddenAttributes.containsAll(attributes)) { - throw new ResponseStatusException( - HttpStatus.BAD_REQUEST, - "One or more requested attributes are not available on the feature type"); - } - } else if (!tmft.getSettings().getHideAttributes().isEmpty()) { - // Only specify specific propNames if there are hidden attributes. Having no propNames - // request parameter to request all propNames is less error-prone than specifying the ones - // we have saved in the feature type - attributes = new HashSet<>(nonHiddenAttributes); - } - - // Empty attributes means we won't specify propNames in the GetFeature request. However, if we do select only - // some property names, we need the geometry attribute which is not in the 'attributes' request param so spatial - // export formats don't have the geometry missing. - if (!attributes.isEmpty() && tmft.getDefaultGeometryAttribute() != null) { - attributes.add(tmft.getDefaultGeometryAttribute()); - } - - // Remove attributes which the WFS does not expose. This can be the case when using the - // 'customize attributes' feature in GeoServer but when TM has been configured with a JDBC - // feature type with all the attributes. Requesting a non-existing attribute will return an - // error. - try { - List wfsAttributeNames = getWFSAttributeNames(wfsTypeNameDescriptor); - attributes.retainAll(wfsAttributeNames); - // SSRF prevention: only allow known-safe attribute names - attributes.removeIf(attr -> !attr.matches("^[A-Za-z0-9_]+$")); - if (!CollectionUtils.isEmpty(attributes) - && attributes.stream().anyMatch(attr -> !wfsAttributeNames.contains(attr))) { - logger.warn("Download request contained illegal attribute(s)"); - return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Invalid attribute selection"); - } - } catch (IOException e) { - logger.error("Error getting WFS feature type", e); - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("Error getting WFS feature type"); - } - - return downloadFromWFS( - wfsTypeNameDescriptor, outputFormat, attributes, filter, sortBy, sortOrder, crs, request); - } - - private ResponseEntity downloadFromWFS( - WFSTypeNameDescriptor wfsTypeName, - String outputFormat, - Set attributes, - String filter, - String sortBy, - String sortOrder, - String crs, - HttpServletRequest request) { - - MultiValueMap getFeatureParameters = new LinkedMultiValueMap<>(); - // A layer could have more than one featureType as source, currently we assume it's just one - getFeatureParameters.add("typeNames", wfsTypeName.typeName()); - getFeatureParameters.add("outputFormat", outputFormat); - if (filter != null) { - // GeoServer vendor-specific - // https://docs.geoserver.org/latest/en/user/services/wfs/vendor.html#cql-filters - getFeatureParameters.add("cql_filter", filter); - } - if (crs != null) { - getFeatureParameters.add("srsName", crs); - } - if (!CollectionUtils.isEmpty(attributes)) { - getFeatureParameters.add("propertyName", String.join(",", attributes)); - } - if (sortBy != null) { - getFeatureParameters.add("sortBy", sortBy + ("asc".equals(sortOrder) ? " A" : " D")); - } - URI wfsGetFeature = SimpleWFSHelper.getWFSRequestURL(wfsTypeName.wfsUrl(), "GetFeature", getFeatureParameters); - - logger.info("Layer download, proxying WFS GetFeature request {}", wfsGetFeature); - try { - // TODO: close JPA connection before proxying - HttpResponse response = - wfsProxy.proxyWfsRequest(wfsGetFeature, wfsTypeName.username(), wfsTypeName.password(), request); - - logger.info( - "Layer download response code: {}, content type: {}, disposition: {}", - response.statusCode(), - response.headers() - .firstValue("Content-Type") - .map(Object::toString) - .orElse(""), - response.headers() - .firstValue("Content-Disposition") - .map(Object::toString) - .orElse("")); - - InputStreamResource body = new InputStreamResource(response.body()); - - org.springframework.http.HttpHeaders headers = - passthroughResponseHeaders(response.headers(), Set.of("Content-Type", "Content-Disposition")); - - // TODO: record response size and time with micrometer - return ResponseEntity.status(response.statusCode()).headers(headers).body(body); - } catch (Exception e) { - return ResponseEntity.status(HttpStatus.BAD_GATEWAY).body("Bad Gateway"); - } - } - - private record WFSTypeNameDescriptor(String wfsUrl, String typeName, String username, String password) {} - - private WFSTypeNameDescriptor findWFSFeatureType(GeoService service, GeoServiceLayer layer, TMFeatureType tmft) { - - String wfsUrl = null; - String typeName = null; - String username = null; - String password = null; - ServiceAuthentication auth = null; - - if (tmft != null) { - TMFeatureSource featureSource = tmft.getFeatureSource(); - - if (featureSource.getProtocol() == TMFeatureSource.Protocol.WFS) { - wfsUrl = featureSource.getUrl(); - typeName = tmft.getName(); - auth = featureSource.getAuthentication(); - } - } - - if ((wfsUrl == null || typeName == null) && service.getProtocol() == GeoServiceProtocol.WMS) { - // Try to find out the WFS by doing a DescribeLayer request (from OGC SLD spec) - auth = service.getAuthentication(); - - SimpleWFSLayerDescription wfsLayerDescription = getWFSLayerDescriptionForWMS(service, layer.getName()); - if (wfsLayerDescription != null - && wfsLayerDescription.wfsUrl() != null - && wfsLayerDescription.getFirstTypeName() != null) { - wfsUrl = wfsLayerDescription.wfsUrl(); - // Ignores possibly multiple feature types associated with the layer (a group layer for - // instance) - typeName = wfsLayerDescription.getFirstTypeName(); - auth = service.getAuthentication(); - } - } - - if (auth != null && auth.getMethod() == ServiceAuthentication.MethodEnum.PASSWORD) { - username = auth.getUsername(); - password = auth.getPassword(); - } - - if (wfsUrl != null && typeName != null) { - return new WFSTypeNameDescriptor(wfsUrl, typeName, username, password); - } else { - return null; - } - } - - private SimpleWFSLayerDescription getWFSLayerDescriptionForWMS(GeoService wmsService, String layerName) { - String username = null; - String password = null; - if (wmsService.getAuthentication() != null - && wmsService.getAuthentication().getMethod() == ServiceAuthentication.MethodEnum.PASSWORD) { - username = wmsService.getAuthentication().getUsername(); - password = wmsService.getAuthentication().getPassword(); - } - SimpleWFSLayerDescription wfsLayerDescription = - SimpleWFSHelper.describeWMSLayer(wmsService.getUrl(), username, password, layerName); - if (wfsLayerDescription != null && !wfsLayerDescription.typeNames().isEmpty()) { - logger.info( - "WMS described layer \"{}\" with typeNames \"{}\" of WFS \"{}\" for WMS \"{}\"", - layerName, - wfsLayerDescription.typeNames(), - wfsLayerDescription.wfsUrl(), - wmsService.getUrl()); - - return wfsLayerDescription; - } - return null; - } - - /** - * Get the (exposed) attribute names of the WFS feature type. - * - * @param wfsTypeNameDescriptor provides the WFS feature type to get the attribute names for - * @return a list of attribute names for the WFS feature type - * @throws IOException if there were any problems setting up (creating or connecting) the datasource. - */ - private static List getWFSAttributeNames(WFSTypeNameDescriptor wfsTypeNameDescriptor) throws IOException { - Map connectionParameters = new HashMap<>(); - connectionParameters.put( - WFSDataStoreFactory.ENTITY_RESOLVER.key, PreventLocalAllowNestedJarEntityResolver.INSTANCE); - connectionParameters.put( - WFSDataStoreFactory.URL.key, - SimpleWFSHelper.getWFSRequestURL(wfsTypeNameDescriptor.wfsUrl(), "GetCapabilities") - .toURL()); - connectionParameters.put(WFSDataStoreFactory.PROTOCOL.key, false); - connectionParameters.put(WFSDataStoreFactory.WFS_STRATEGY.key, "geoserver"); - connectionParameters.put(WFSDataStoreFactory.LENIENT.key, true); - connectionParameters.put(WFSDataStoreFactory.TIMEOUT.key, SimpleWFSHelper.TIMEOUT); - if (wfsTypeNameDescriptor.username() != null) { - connectionParameters.put(WFSDataStoreFactory.USERNAME.key, wfsTypeNameDescriptor.username()); - connectionParameters.put(WFSDataStoreFactory.PASSWORD.key, wfsTypeNameDescriptor.password()); - } - - WFSDataStore wfs = new WFSDataStoreFactory().createDataStore(connectionParameters); - List attributeNames = - wfs.getFeatureSource(wfsTypeNameDescriptor.typeName()).getSchema().getAttributeDescriptors().stream() - .map(AttributeDescriptor::getLocalName) - .toList(); - - wfs.dispose(); - return attributeNames; - } -} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index c3c5d2a09f..0b14db7c80 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -28,10 +28,6 @@ tailormap-api.features.wfs_count_exact=false # maximum number of items to return in a single (WFS/JDBC) feature info request tailormap-api.feature.info.maxitems=30 -# Should match the list in tailormap-viewer class AttributeListExportService -# deprecated -tailormap-api.export.allowed-outputformats=csv,text/csv,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,excel2007,application/vnd.shp,application/x-zipped-shp,SHAPE-ZIP,application/geopackage+sqlite3,application/x-gpkg,geopackage,geopkg,gpkg,application/geo+json,application/geojson,application/json,json,DXF-ZIP - # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk diff --git a/src/main/resources/openapi/obsolete-viewer-schemas.yaml b/src/main/resources/openapi/obsolete-viewer-schemas.yaml deleted file mode 100644 index a07b30ac60..0000000000 --- a/src/main/resources/openapi/obsolete-viewer-schemas.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (C) 2026 B3Partners B.V. -# -# SPDX-License-Identifier: MIT -# -openapi: 3.0.4 -info: - title: 'obsolete viewer models' - description: 'no servers or paths, just obsolete models in this document that need to be generated for backwards - compatibility, but should not be used in the API anymore.' - version: '1.0' - license: - name: 'MIT' - url: 'https://mit-license.org/' - contact: - name: 'B3Partners BV' - url: 'https://www.b3partners.nl/' - email: 'info@b3partners.nl' -servers: [ ] -paths: { } - -components: - schemas: - LayerExportCapabilities: - description: '**OBSOLETE**, since the export capabilities are now predefined and no longer need to be discovered per layer' - type: object - required: [exportable] - properties: - exportable: - nullable: false - type: boolean - outputFormats: - type: array - items: - type: string \ No newline at end of file diff --git a/src/test/java/org/tailormap/api/controller/LayerExportControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExportControllerIntegrationTest.java deleted file mode 100644 index 602758e22c..0000000000 --- a/src/test/java/org/tailormap/api/controller/LayerExportControllerIntegrationTest.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 2023 B3Partners B.V. - * - * SPDX-License-Identifier: MIT - */ -package org.tailormap.api.controller; - -import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; -import static org.tailormap.api.TestRequestProcessor.setServletPath; -import static org.tailormap.api.controller.TestUrls.layerBakPostgis; -import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; -import static org.tailormap.api.controller.TestUrls.layerProvinciesWfs; -import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; -import static org.tailormap.api.controller.TestUrls.layerWaterdeel; - -import org.hamcrest.Matchers; -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; -import org.junit.jupiter.api.parallel.Execution; -import org.junit.jupiter.api.parallel.ExecutionMode; -import org.junitpioneer.jupiter.Issue; -import org.junitpioneer.jupiter.Stopwatch; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; -import org.springframework.http.MediaType; -import org.springframework.test.web.servlet.MockMvc; -import org.tailormap.api.StaticTestData; -import org.tailormap.api.annotation.PostgresIntegrationTest; - -@PostgresIntegrationTest -@AutoConfigureMockMvc -@Execution(ExecutionMode.CONCURRENT) -@Stopwatch -@TestMethodOrder(MethodOrderer.OrderAnnotation.class) -class LayerExportControllerIntegrationTest { - - private static final String downloadPath = "/export/download"; - private static final String capabilitiesPath = "/export/capabilities"; - - @Autowired - private MockMvc mockMvc; - - @Value("${tailormap-api.base-path}") - private String apiBasePath; - - @Test - void should_return_export_capabilities_with_jdbc_feature_source() throws Exception { - final String url = apiBasePath + layerWaterdeel + capabilitiesPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON).with(setServletPath(url))) - .andExpect(status().isOk()) - .andExpect(content().contentType(MediaType.APPLICATION_JSON)) - .andExpect(jsonPath("$.exportable").value(true)) - .andExpect(jsonPath("$.outputFormats") - .value(Matchers.containsInAnyOrder( - "text/xml; subtype=gml/3.1.1", - "DXF", - "DXF-ZIP", - "GML2", - "KML", - "SHAPE-ZIP", - "application/geopackage+sqlite3", - "application/gml+xml; version=3.2", - "application/json", - "application/vnd.google-earth.kml xml", - "application/vnd.google-earth.kml+xml", - "application/x-gpkg", - "csv", - "excel", - "excel2007", - "geopackage", - "geopkg", - "gml3", - "gml32", - "gpkg", - "json", - "text/csv", - "text/xml; subtype=gml/2.1.2", - "text/xml; subtype=gml/3.2", - "application/vnd.ogc.fg+json", - "application/geo+json"))); - } - - @Test - void should_return_export_capabilities_with_wfs_feature_source() throws Exception { - final String url = apiBasePath + layerProvinciesWfs + capabilitiesPath; - mockMvc.perform(get(url).with(setServletPath(url)).accept(MediaType.APPLICATION_JSON)) - .andExpect(status().isOk()) - .andExpect(content().contentType(MediaType.APPLICATION_JSON)) - .andExpect(jsonPath("$.exportable").value(true)) - .andExpect(jsonPath("$.outputFormats") - .value(Matchers.containsInAnyOrder( - "text/xml; subtype=gml/3.1.1", - "application/json; subtype=geojson", - "application/json", - "text/xml"))); - } - - @Test - void should_export_geo_json() throws Exception { - final String url = apiBasePath + layerProvinciesWfs + downloadPath; - mockMvc.perform(get(url).with(setServletPath(url)) - .accept(MediaType.APPLICATION_JSON) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("attributes", "geom,naam,code")) - .andExpect(status().isOk()) - .andExpect(content().contentType(MediaType.APPLICATION_JSON)) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.name").value("Provinciegebied")) - .andExpect(jsonPath("$.features.length()").value(12)) - .andExpect(jsonPath("$.features[0].geometry.type").value("MultiPolygon")); - } - - @Test - void should_export_geo_package() throws Exception { - final String url = apiBasePath + layerWaterdeel + downloadPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", "application/geopackage+sqlite3")) - .andExpect(status().isOk()) - .andExpect(content().contentType("application/geopackage+sqlite3")); - } - - @Test - void should_export_geo_json_with_filter() throws Exception { - final String url = apiBasePath + layerWaterdeel + downloadPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("filter", "(BRONHOUDER IN ('G1904'))")) - .andExpect(status().isOk()) - // GeoServer returns application/json;charset=UTF-8; but this is deprecated - // .andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8_VALUE)) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.features.length()").value(1)) - .andExpect(jsonPath("$.features[0].geometry.type").value("Polygon")) - .andExpect(jsonPath("$.features[0].properties.BRONHOUDER").value("G1904")); - } - - @Test - void should_export_large_filter_using_post() throws Exception { - final String url = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath; - - mockMvc.perform(post(url) - .accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .with(csrf()) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("filter", StaticTestData.get("large_cql_filter")) - .contentType(MediaType.APPLICATION_FORM_URLENCODED)) - .andExpect(status().isOk()) - // GeoServer returns application/json;charset=UTF-8; but this is deprecated - // .andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8_VALUE)) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.features.length()").value(18)) - .andExpect(jsonPath("$.features[0].geometry.type").value("Polygon")); - } - - @Test - void should_export_large_filter_using_post_and_plus_in_output_format() throws Exception { - final String url = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath; - - mockMvc.perform(post(url) - .accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .with(csrf()) - // Test using an outputFormat with a '+' that should be encoded to "%2B" and not be interpreted - // as a space - .param("outputFormat", "application/geopackage+sqlite3") - .param("filter", StaticTestData.get("large_cql_filter")) - .contentType(MediaType.APPLICATION_FORM_URLENCODED)) - .andExpect(status().isOk()) - .andExpect(content().contentType("application/geopackage+sqlite3")); - } - - @Test - void should_export_geo_json_with_filter_and_sort() throws Exception { - final String url = apiBasePath + layerWaterdeel + downloadPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("filter", "(BRONHOUDER IN ('G1904','L0002','L0004'))") - .param("sortBy", "CLASS") - .param("sortOrder", "asc")) - .andExpect(status().isOk()) - // GeoServer returns application/json;charset=UTF-8; but this is deprecated - // .andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8_VALUE)) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.features.length()").value(19)) - .andExpect(jsonPath("$.features[0].geometry.type").value("Polygon")) - .andExpect(jsonPath("$.features[0].properties.CLASS").value("greppel, droge sloot")); - - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("filter", "(BRONHOUDER IN ('G1904','L0002','L0004'))") - .param("sortBy", "CLASS") - .param("sortOrder", "desc")) - .andExpect(status().isOk()) - // GeoServer returns application/json;charset=UTF-8; but this is deprecated - // .andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8_VALUE)) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.features.length()").value(19)) - .andExpect(jsonPath("$.features[0].geometry.type").value("Polygon")) - .andExpect(jsonPath("$.features[0].properties.CLASS").value("watervlakte")); - } - - @Test - void should_not_export_hidden_attributes_in_geo_json_when_requested() throws Exception { - final String url = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - // terminationdate,geom_kruinlijn are hidden attributes - .param("attributes", "identificatie,bronhouder,class,terminationdate,geom_kruinlijn")) - .andExpect(status().is4xxClientError()) - .andExpect(content().contentType(MediaType.APPLICATION_JSON)) - .andExpect(jsonPath("$.message") - .value("One or more requested attributes are not available on the feature type")); - } - - @Test - @Issue("https://b3partners.atlassian.net/browse/SUPPORT-14840") - void should_not_export_hidden_attributes_in_geo_json() throws Exception { - final String url = apiBasePath + layerBakPostgis + downloadPath; - mockMvc.perform(get(url).accept(MediaType.APPLICATION_JSON) - .with(setServletPath(url)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE) - .param("filter", "(identificatie = 'P0026.8abeacd54c5b7500047b2112796cab56')")) - .andExpect(status().isOk()) - .andExpect(jsonPath("$.type").value("FeatureCollection")) - .andExpect(jsonPath("$.features.length()").value(1)) - // all attributes are hidden except bronhouder and identificatie - .andExpect(jsonPath("$.features[0].properties.identificatie") - .value("P0026.8abeacd54c5b7500047b2112796cab56")) - // mandatory, but hidden attributes of the schema - .andExpect( - jsonPath("$.features[0].properties.lv_publicatiedatum").isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.creationdate").isNotEmpty()) - .andExpect( - jsonPath("$.features[0].properties.tijdstipregistratie").isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.bronhouder").value("P0026")) - .andExpect(jsonPath("$.features[0].properties.inonderzoek").isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.relatievehoogteligging") - .isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.bgt_status").isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.function_").isNotEmpty()) - .andExpect(jsonPath("$.features[0].properties.plus_type").isNotEmpty()) - // non-mandatory attributes of the schema that were not requested - .andExpect(jsonPath("$.features[0].properties.eindregistratie").doesNotHaveJsonPath()) - .andExpect(jsonPath("$.features[0].properties.terminationdate").doesNotHaveJsonPath()); - } - - @Test - void wms_secured_proxy_not_in_public_app() throws Exception { - final String testUrl = apiBasePath + layerProxiedWithAuthInPublicApp + "/export/download"; - mockMvc.perform(get(testUrl) - .accept(MediaType.APPLICATION_JSON) - .with(setServletPath(testUrl)) - .param("outputFormat", MediaType.APPLICATION_JSON_VALUE)) - .andExpect(status().isForbidden()); - } - - @Test - void invalid_output_format_not_accepted() throws Exception { - final String testUrl = apiBasePath + layerBegroeidTerreindeelPostgis + "/export/download"; - mockMvc.perform(get(testUrl) - .accept(MediaType.APPLICATION_JSON) - .with(setServletPath(testUrl)) - .param("outputFormat", "Invalid value!")) - .andExpect(status().isBadRequest()); - } -} diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 4b836a325c..39c94a86cb 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -2,8 +2,6 @@ tailormap-api.base-path=/api tailormap-api.admin.base-path=/api/admin management.endpoints.web.base-path=/api/actuator tailormap-api.new-admin-username=tm-admin -# deprecated -tailormap-api.export.allowed-outputformats=application/geopackage+sqlite3,application/json # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape # the number of features after which a progress report is sent back to the viewer, to update the progress bar From c7d166caf19e53cf987d8a7734bc88f08a58bc3c Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Tue, 5 May 2026 12:28:37 +0200 Subject: [PATCH 12/17] HTM-2017: Improve Excel extract by omitting column auto-sizing on large record sets --- .../service/CreateLayerExtractService.java | 27 ++- ...LayerExtractControllerIntegrationTest.java | 41 +--- ...ctControllerLargeExcelIntegrationTest.java | 178 ++++++++++++++++++ .../api/controller/SseParsingUtils.java | 49 +++++ .../tailormap/api/controller/TestUrls.java | 1 - 5 files changed, 246 insertions(+), 50 deletions(-) create mode 100644 src/test/java/org/tailormap/api/controller/LayerExtractControllerLargeExcelIntegrationTest.java create mode 100644 src/test/java/org/tailormap/api/controller/SseParsingUtils.java diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 9ef20634df..13fc60d6e0 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -9,7 +9,6 @@ import ch.rasc.sse.eventbus.SseEventBus; import java.io.File; import java.io.IOException; -import java.io.Serializable; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -269,6 +268,10 @@ private void handleSingleFileFormats( DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); outputDataStore.createSchema(fType); + if (outputDataStore instanceof ExcelDataStore excelDataStore) { + excelDataStore.setEnableCellAutoSizing(featCount >= 0 && featCount < 1000); + } + final AtomicInteger featsAdded = new AtomicInteger(); if (outputDataStore.getFeatureSource() instanceof SimpleFeatureStore featureStore) { featureStore.setTransaction(outputTransaction); @@ -289,8 +292,8 @@ private void handleSingleFileFormats( }); featureStore.addFeatures(inputFeatureSource.getFeatures(q)); outputTransaction.commit(); - this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); outputDataStore.dispose(); + this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); } else { outputDataStore.dispose(); this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features"); @@ -372,13 +375,19 @@ private FileDataStore getExtractDataStore( true)); } case XLSX -> { - Map params = Map.of( - ExcelDataStoreFactory.FILE_PARAM.key, - outputFile, - ExcelDataStoreFactory.SHEET_PARAM.key, - // typeName could have a prefix; for Excel sheet names ':' is disallowed, max length is 31 - typeName.substring(typeName.lastIndexOf(":") + 1, Math.min(typeName.length(), 31))); - return (FileDataStore) new ExcelDataStoreFactory().createNewDataStore(params); + // replace any invalid characters such as /\?*[] with '_' and clip to 31 characters because Excel has + // limitations on sheet names. Also clip off any WFS namespace prefix in the type name, which is often + // separated by a ':' character, because ':' is not allowed in Excel sheet names. + typeName = typeName.contains(":") + ? typeName.substring(typeName.lastIndexOf(":") + 1).replaceAll("[\\\\/?*\\[\\]:]", "_") + : typeName.replaceAll("[\\\\/?*\\[\\]:]", "_"); + typeName = typeName.substring(0, Math.min(typeName.length(), 31)); + return (FileDataStore) new ExcelDataStoreFactory() + .createNewDataStore(Map.of( + ExcelDataStoreFactory.FILE_PARAM.key, + outputFile, + ExcelDataStoreFactory.SHEET_PARAM.key, + typeName)); } case GEOJSON -> { return (FileDataStore) new GeoJSONDataStoreFactory() diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index 85d2f5e54b..0152f1e41f 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -29,7 +29,6 @@ import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.HashSet; @@ -57,15 +56,13 @@ import org.springframework.test.web.servlet.MvcResult; import org.tailormap.api.StaticTestData; import org.tailormap.api.annotation.PostgresIntegrationTest; -import org.tailormap.api.viewer.model.ServerSentEventResponse; -import tools.jackson.databind.ObjectMapper; @PostgresIntegrationTest @AutoConfigureMockMvc @Execution(ExecutionMode.CONCURRENT) @Stopwatch @TestMethodOrder(MethodOrderer.OrderAnnotation.class) -class LayerExtractControllerIntegrationTest { +class LayerExtractControllerIntegrationTest extends SseParsingUtils { private static final String extractPath = "/extract/"; private static final String downloadPath = "/extract/download/"; // Use a unique clientId per test instance to avoid cross-test interference @@ -533,40 +530,4 @@ void should_export_large_filter_to_shape() throws Exception { assertEquals(6, extensions.size(), "Expected 6 unique file extensions in the shapefile zip"); } } - - /** - * Parse the last non-empty line from the SSE stream that looks something like: - * {@code data:{"details":{"message":"Extract task - * completed","progress":100,"file":"begroeidterreindeel15061479295163305053.csv"},"eventType":"extract-completed","id":"019d6838-7f48-7053-9256-dd4b57c14264"} - * } as JSON and extract the file from the details. - */ - private String getLastCompletedEventJson(String sseMessages) throws IOException { - return java.util.Arrays.stream(sseMessages.split("\\R")) - .map(String::trim) - .filter(line -> !line.isEmpty()) - .filter(line -> line.startsWith("data:")) - .filter(line -> line.contains("\"eventType\":\"extract-completed\"")) - .reduce((first, second) -> second) - .orElseThrow() - .substring("data:".length()); - } - - private String getDownloadId(String eventJson) { - return new ObjectMapper() - .readTree(eventJson) - .path("details") - .path("downloadId") - .asString(); - } - - private int count_completed_messages(String s) { - int count = 0; - int index = 0; - final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + "\""; - while ((index = s.indexOf(marker, index)) != -1) { - count++; - index += marker.length(); - } - return count; - } } diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerLargeExcelIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerLargeExcelIntegrationTest.java new file mode 100644 index 0000000000..daa0aac156 --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerLargeExcelIntegrationTest.java @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import static org.tailormap.api.TestRequestProcessor.setServletPath; +import static org.tailormap.api.controller.TestUrls.layerOsmPolygonPostgis; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; +import org.apache.poi.util.IOUtils; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junitpioneer.jupiter.Issue; +import org.junitpioneer.jupiter.Stopwatch; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; +import org.tailormap.api.annotation.PostgresIntegrationTest; + +@PostgresIntegrationTest +@AutoConfigureMockMvc +@Execution(ExecutionMode.CONCURRENT) +@Issue("HTM-2017: Large Excel export takes long time") +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class LayerExtractControllerLargeExcelIntegrationTest extends SseParsingUtils { + private static final String extractPath = "/extract/"; + private static final String downloadPath = "/extract/download/"; + // Use a unique clientId per test instance to avoid cross-test interference + // when running concurrently. + private final String sseClientId = "testcase-" + System.nanoTime(); + + @Autowired + private MockMvc mockMvc; + + @Value("${tailormap-api.base-path}") + private String apiBasePath; + + /** SSE connection result; its response buffer accumulates server-sent events. */ + private MvcResult sseResult; + + @BeforeEach + void start_sse_stream() throws Exception { + final String sseUrl = apiBasePath + "/events/" + sseClientId; + sseResult = mockMvc.perform(get(sseUrl) + .accept(MediaType.TEXT_EVENT_STREAM) + .with(setServletPath(sseUrl)) + .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(request().asyncStarted()) + .andReturn(); + } + + @Stopwatch + @Test + void should_export_large_dataset_to_excel() throws Exception { + final String extractUrl = apiBasePath + layerOsmPolygonPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param( + "attributes", + "osm_id,access,addr:housename,addr:housenumber,addr:interpolation,admin_level,aerialway,aeroway,amenity,area,barrier,bicycle,brand,bridge,boundary,building,construction,covered,culvert,cutting,denomination,disused,embankment,foot,generator:source,harbour,highway,historic,horse,intermittent,junction,landuse,layer,leisure,lock,man_made,military,motorcar,name,natural,office,oneway,operator,place,population,power,power_source,public_transport,railway,ref,religion,route,service,shop,sport,surface,toll,tourism,tower:type,tracktype,tunnel,water,waterway,wetland,width,wood,z_order,way_area") + .param("outputFormat", "xlsx") + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + // should finish in less than 2 minutes + Awaitility.await().pollInterval(5, SECONDS).atMost(2, MINUTES).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(".xlsx")); + + final String downloadUrl = apiBasePath + layerOsmPolygonPostgis + downloadPath + extractedDownloadId; + MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat( + contentType, + containsString("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }) + .andReturn(); + + // open the Excel file and check that we have the expected content + // allow reading large files into byte arrays, this is 10x the default value + int rememberMaxOverride = IOUtils.getByteArrayMaxOverride(); + IOUtils.setByteArrayMaxOverride(1_000_000_000); + try (InputStream inp = new ByteArrayInputStream(download.getResponse().getContentAsByteArray()); + Workbook wb = WorkbookFactory.create(inp)) { + + Sheet sheet = wb.getSheetAt(0); + + assertAll( + "Check sheet", + () -> assertEquals( + 102467 + /*header row*/ 1, + sheet.getPhysicalNumberOfRows(), + () -> "Expected " + 102467 + /*header row*/ 1 + + " rows in the Excel sheet, including header and data rows"), + () -> assertEquals("osm_polygon", sheet.getSheetName(), "Expected sheet name to be osm_polygon"), + () -> assertEquals( + 69, sheet.getRow(0).getPhysicalNumberOfCells(), "Expected 69 columns in the header row")); + + Map columnNames = new HashMap<>(); + sheet.getRow(0).forEach(cell -> columnNames.put(cell.getStringCellValue(), cell.getColumnIndex())); + + assertAll( + "Check first data row", + () -> assertEquals( + CellType.NUMERIC, + sheet.getRow(1).getCell(columnNames.get("osm_id")).getCellType(), + "Expected first cell in header to be numeric"), + () -> assertEquals( + CellType.BLANK, + sheet.getRow(1).getCell(columnNames.get("access")).getCellType(), + "Expected second cell in header to be a string"), + () -> assertEquals( + "meadow", + sheet.getRow(1).getCell(columnNames.get("landuse")).getStringCellValue()), + () -> assertEquals( + 68651.3, + sheet.getRow(1).getCell(columnNames.get("way_area")).getNumericCellValue(), + 0.1)); + } finally { + IOUtils.setByteArrayMaxOverride(rememberMaxOverride); + } + } +} diff --git a/src/test/java/org/tailormap/api/controller/SseParsingUtils.java b/src/test/java/org/tailormap/api/controller/SseParsingUtils.java new file mode 100644 index 0000000000..6436ed1d49 --- /dev/null +++ b/src/test/java/org/tailormap/api/controller/SseParsingUtils.java @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2026 B3Partners B.V. + * + * SPDX-License-Identifier: MIT + */ +package org.tailormap.api.controller; + +import org.tailormap.api.viewer.model.ServerSentEventResponse; +import tools.jackson.core.JacksonException; +import tools.jackson.databind.ObjectMapper; + +abstract class SseParsingUtils { + + /** + * Parse the last non-empty line from the SSE stream that looks something like: + * {@code data:{"details":{"message":"Extract task + * completed","progress":100,"downloadId":"begroeidterreindeel15061479295163305053.csv"},"eventType":"extract-completed","id":"019d6838-7f48-7053-9256-dd4b57c14264"} + * } as JSON and extract the file from the details. + */ + String getLastCompletedEventJson(String sseMessages) { + return java.util.Arrays.stream(sseMessages.split("\\R")) + .map(String::trim) + .filter(line -> !line.isEmpty()) + .filter(line -> line.startsWith("data:")) + .filter(line -> line.contains("\"eventType\":\"extract-completed\"")) + .reduce((first, second) -> second) + .orElseThrow() + .substring("data:".length()); + } + + String getDownloadId(String eventJson) throws JacksonException { + return new ObjectMapper() + .readTree(eventJson) + .path("details") + .path("downloadId") + .asString(); + } + + int count_completed_messages(String s) { + int count = 0; + int index = 0; + final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + "\""; + while ((index = s.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } +} diff --git a/src/test/java/org/tailormap/api/controller/TestUrls.java b/src/test/java/org/tailormap/api/controller/TestUrls.java index 591a0bb11e..9e376f14e9 100644 --- a/src/test/java/org/tailormap/api/controller/TestUrls.java +++ b/src/test/java/org/tailormap/api/controller/TestUrls.java @@ -14,6 +14,5 @@ public interface TestUrls { String layerWegdeelSqlServer = "/app/default/layer/lyr:snapshot-geoserver:sqlserver:wegdeel"; String layerOsmPolygonPostgis = "/app/default/layer/lyr:snapshot-geoserver:postgis:osm_polygon"; String layerProxiedWithAuthInPublicApp = "/app/default/layer/lyr:bestuurlijkegebieden-proxied:Provinciegebied"; - String layerWaterdeel = "/app/default/layer/lyr:snapshot-geoserver:oracle:WATERDEEL"; String layerKadastraalPerceel = "/app/default/layer/lyr:snapshot-geoserver:postgis:kadastraal_perceel"; } From 19928c0f56fb967d5cd2ccd30a201ceefaf3a717 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Tue, 5 May 2026 17:03:03 +0200 Subject: [PATCH 13/17] HTM-2016: Create a separate SseEventBus bean for the viewer so viewers don't get aministrative braodcasts --- README.md | 17 ++++ .../api/configuration/TailormapConfig.java | 58 +++++++++++++ .../ServerSentEventsController.java | 11 ++- .../ServerSentEventsAdminController.java | 12 ++- .../service/CreateLayerExtractService.java | 5 +- ...erSentEventsControllerIntegrationTest.java | 85 ++++++++++++++++--- .../api/controller/SseParsingUtils.java | 35 ++++++++ 7 files changed, 206 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 682a6eecde..5e32461960 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,23 @@ mvn -Pdeveloping,postgresql verify -Dspring-boot.run.profiles=dev,populate-testd * You can use `mvn -U org.codehaus.mojo:versions-maven-plugin:display-dependency-updates` to search for dependency updates +#### SSE streams + +We have 2 SSE streams available, one in the admin API at: `api/admin/events/{clientId}` and one in the viewer API +at: `api/events/{clientId}`. When using these streams you must make sure that you are using/injecting the correct +`SseEventBus` for each API. + +For the admin use the default `eventBus` bean, inject using: `SseEventBus eventBus` (defined using the +`@EnableSseEventBus` annotation in the `TailormapConfig` class). +For the viewer use the `viewerSseEventBus` bean, inject using: `@Qualifier("viewerSseEventBus") SseEventBus eventBus` +(defined in the `TailormapConfig` class). +See: +- [ServerSentEventsAdminController](src/main/java/org/tailormap/api/controller/admin/ServerSentEventsAdminController.java) for the admin configuration +- [ServerSentEventsController](src/main/java/org/tailormap/api/controller/ServerSentEventsController.java) for the viewer configuration +- [TailormapConfig](src/main/java/org/tailormap/api/configuration/TailormapConfig.java) for the bean definitions + +If you inject the wrong one you may not receive the events you want, and you risk sending administrative events to the viewer. + ## Releasing ### Prerequisites diff --git a/src/main/java/org/tailormap/api/configuration/TailormapConfig.java b/src/main/java/org/tailormap/api/configuration/TailormapConfig.java index 39c621e120..e1bb884484 100644 --- a/src/main/java/org/tailormap/api/configuration/TailormapConfig.java +++ b/src/main/java/org/tailormap/api/configuration/TailormapConfig.java @@ -5,8 +5,23 @@ */ package org.tailormap.api.configuration; +import ch.rasc.sse.eventbus.DataObjectConverter; +import ch.rasc.sse.eventbus.DefaultDataObjectConverter; +import ch.rasc.sse.eventbus.DefaultSubscriptionRegistry; +import ch.rasc.sse.eventbus.DistributedEventBus; +import ch.rasc.sse.eventbus.JacksonDataObjectConverter; +import ch.rasc.sse.eventbus.ReplayStore; +import ch.rasc.sse.eventbus.SseEventBus; +import ch.rasc.sse.eventbus.SubscriptionRegistry; import ch.rasc.sse.eventbus.config.EnableSseEventBus; +import ch.rasc.sse.eventbus.config.SseEventBusConfigurer; +import ch.rasc.sse.eventbus.observation.SseEventBusObservationConvention; +import io.micrometer.observation.ObservationRegistry; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; +import org.jspecify.annotations.Nullable; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.boot.context.properties.EnableConfigurationProperties; @@ -15,6 +30,7 @@ import org.springframework.scheduling.annotation.EnableScheduling; import org.springframework.web.servlet.LocaleResolver; import org.springframework.web.servlet.i18n.AcceptHeaderLocaleResolver; +import tools.jackson.databind.ObjectMapper; @Configuration @EnableConfigurationProperties @@ -42,4 +58,46 @@ public LocaleResolver localeResolver() { resolver.setDefaultLocale(Locale.of(defaultLanguage)); return resolver; } + + /** + * Define a new viewer SseEventBus bean for viewer-specific SSE traffic. + * + * @return the viewerSseEventBus instance + */ + @Bean("viewerSseEventBus") + public SseEventBus viewerSseEventBus( + @Autowired(required = false) @Nullable SseEventBusConfigurer configurer, + @Autowired(required = false) @Nullable ObjectMapper objectMapper, + @Autowired(required = false) @Nullable List dataObjectConverters, + @Autowired(required = false) @Nullable SubscriptionRegistry subscriptionRegistry, + @Autowired(required = false) @Nullable ReplayStore replayStore, + @Autowired(required = false) @Nullable ObservationRegistry observationRegistry, + @Autowired(required = false) @Nullable SseEventBusObservationConvention observationConvention, + @Autowired(required = false) @Nullable DistributedEventBus distributedEventBus) { + + // Apply same defaults as DefaultSseEventBusConfiguration + SseEventBusConfigurer config = configurer != null + ? configurer + : new SseEventBusConfigurer() { + /* defaults */ + }; + + SubscriptionRegistry registry = + subscriptionRegistry != null ? subscriptionRegistry : new DefaultSubscriptionRegistry(); + + ReplayStore store = replayStore != null ? replayStore : config.replayStore(); + + List converters = + dataObjectConverters != null ? new ArrayList<>(dataObjectConverters) : new ArrayList<>(); + if (converters.isEmpty()) { + if (objectMapper != null) { + converters.add(new JacksonDataObjectConverter(objectMapper)); + } else { + converters.add(new DefaultDataObjectConverter()); + } + } + + return new SseEventBus( + config, registry, converters, store, observationRegistry, observationConvention, distributedEventBus); + } } diff --git a/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java b/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java index 6723f64c85..085f5debcc 100644 --- a/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java +++ b/src/main/java/org/tailormap/api/controller/ServerSentEventsController.java @@ -10,8 +10,10 @@ import ch.rasc.sse.eventbus.SseEvent; import ch.rasc.sse.eventbus.SseEventBus; import java.lang.invoke.MethodHandles; +import java.util.Collections; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.HttpStatus; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.web.bind.annotation.GetMapping; @@ -19,6 +21,7 @@ import org.springframework.web.bind.annotation.RestController; import org.springframework.web.server.ResponseStatusException; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; +import org.tailormap.api.util.UUIDv7; import org.tailormap.api.viewer.model.ServerSentEventResponse; import tools.jackson.core.JacksonException; import tools.jackson.databind.SerializationFeature; @@ -33,7 +36,7 @@ public class ServerSentEventsController { private final JsonMapper jsonMapper; - public ServerSentEventsController(SseEventBus eventBus, JsonMapper jsonMapper) { + public ServerSentEventsController(@Qualifier("viewerSseEventBus") SseEventBus eventBus, JsonMapper jsonMapper) { this.eventBus = eventBus; // force unindented/single line output for SSE messages, because we may have set // spring.jackson.serialization.indent_output=true for debugging/development/test @@ -60,7 +63,9 @@ public SseEmitter sse(@PathVariable String clientId) { @Scheduled(fixedRate = 60_000) public void keepAlive() throws JacksonException { - this.eventBus.handleEvent(SseEvent.ofData(jsonMapper.writeValueAsString( - new ServerSentEventResponse().eventType(ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE)))); + this.eventBus.handleEvent(SseEvent.ofData(jsonMapper.writeValueAsString(new ServerSentEventResponse() + .eventType(ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE) + .id(UUIDv7.randomV7()) + .details(Collections.emptyMap())))); } } diff --git a/src/main/java/org/tailormap/api/controller/admin/ServerSentEventsAdminController.java b/src/main/java/org/tailormap/api/controller/admin/ServerSentEventsAdminController.java index 97d970fb4a..7999203cf6 100644 --- a/src/main/java/org/tailormap/api/controller/admin/ServerSentEventsAdminController.java +++ b/src/main/java/org/tailormap/api/controller/admin/ServerSentEventsAdminController.java @@ -21,6 +21,7 @@ import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; import org.tailormap.api.admin.model.ServerSentEvent; import tools.jackson.core.JacksonException; +import tools.jackson.databind.SerializationFeature; import tools.jackson.databind.json.JsonMapper; @RestController @@ -34,7 +35,16 @@ public class ServerSentEventsAdminController { public ServerSentEventsAdminController(SseEventBus eventBus, JsonMapper jsonMapper) { this.eventBus = eventBus; - this.jsonMapper = jsonMapper; + // force unindented/single line output for SSE messages, because we may have set + // spring.jackson.serialization.indent_output=true for debugging/development/test + if (jsonMapper.isEnabled(SerializationFeature.INDENT_OUTPUT)) { + this.jsonMapper = jsonMapper + .rebuild() + .configure(SerializationFeature.INDENT_OUTPUT, false) + .build(); + } else { + this.jsonMapper = jsonMapper; + } } /** diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 13fc60d6e0..7b3e6a0de5 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -48,6 +48,7 @@ import org.jspecify.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Scheduled; @@ -89,7 +90,9 @@ public class CreateLayerExtractService { private boolean exactWfsCounts; public CreateLayerExtractService( - SseEventBus eventBus, JsonMapper jsonMapper, FeatureSourceFactoryHelper featureSourceFactoryHelper) { + @Qualifier("viewerSseEventBus") SseEventBus eventBus, + JsonMapper jsonMapper, + FeatureSourceFactoryHelper featureSourceFactoryHelper) { this.eventBus = eventBus; this.featureSourceFactoryHelper = featureSourceFactoryHelper; // force unindented/single line output for SSE messages, because we may have set diff --git a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java index 574bc19640..9008d4ade7 100644 --- a/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/ServerSentEventsControllerIntegrationTest.java @@ -9,8 +9,10 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import static org.tailormap.api.TestRequestProcessor.setServletPath; import java.lang.invoke.MethodHandles; @@ -26,15 +28,18 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc; import org.springframework.http.MediaType; +import org.springframework.security.test.context.support.WithMockUser; import org.springframework.test.web.servlet.MockMvc; import org.springframework.test.web.servlet.MvcResult; +import org.springframework.test.web.servlet.setup.MockMvcBuilders; +import org.springframework.web.context.WebApplicationContext; import org.tailormap.api.annotation.PostgresIntegrationTest; -import org.tailormap.api.viewer.model.ServerSentEventResponse; +import org.tailormap.api.persistence.Group; @PostgresIntegrationTest @AutoConfigureMockMvc @Execution(ExecutionMode.CONCURRENT) -class ServerSentEventsControllerIntegrationTest { +class ServerSentEventsControllerIntegrationTest extends SseParsingUtils { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Unique id avoids interference with parallel/other tests. @@ -46,6 +51,12 @@ class ServerSentEventsControllerIntegrationTest { @Value("${tailormap-api.base-path}") private String apiBasePath; + @Value("${tailormap-api.admin.base-path}") + private String adminBasePath; + + @Autowired + private WebApplicationContext context; + private MvcResult sseResult; @BeforeEach @@ -55,6 +66,7 @@ void start_sse_stream() throws Exception { .accept(MediaType.TEXT_EVENT_STREAM) .with(setServletPath(sseUrl)) .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(status().isOk()) .andExpect(request().asyncStarted()) .andReturn(); } @@ -71,18 +83,67 @@ void should_send_keep_alive_messages_for_two_minutes() { .logging(logPrinter -> logger.debug("Checking for keep-alive messages in SSE stream... {}", logPrinter)) .untilAsserted(() -> { final String stream = sseResult.getResponse().getContentAsString(); - assertThat(count_keep_alive_messages(stream), greaterThanOrEqualTo(2)); + assertThat(count_all_keep_alive_messages(stream), greaterThanOrEqualTo(2)); }); } - private int count_keep_alive_messages(String stream) { - int count = 0; - int index = 0; - final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE + "\""; - while ((index = stream.indexOf(marker, index)) != -1) { - count++; - index += marker.length(); - } - return count; + /** Check that at least 2 keep-alive messages arrive in 130 seconds. */ + @Test + @WithMockUser( + username = "admin", + authorities = {Group.ADMIN}) + void admin_and_viewer_should_use_separate_sse_streams() throws Exception { + // start admin sse stream + MockMvc adminMockMvc = MockMvcBuilders.webAppContextSetup(context).build(); + final String adminSseUrl = adminBasePath + "/events/" + sseClientId; + MvcResult adminSseResult = adminMockMvc + .perform(get(adminSseUrl) + .accept(MediaType.TEXT_EVENT_STREAM) + .with(setServletPath(adminSseUrl)) + .acceptCharset(StandardCharsets.UTF_8)) + .andExpect(status().isOk()) + .andExpect(request().asyncStarted()) + .andReturn(); + + Awaitility.await("Waiting at least 2 minutes for any keep-alive messages") + .pollDelay(45, SECONDS) + .pollInterval(15, SECONDS) + .atLeast(1, MINUTES) + .atMost(130, SECONDS) + .logging( + logPrinter -> logger.debug("Checking for keep-alive messages in SSE streams... {}", logPrinter)) + .untilAsserted(() -> { + // check admin stream + final String adminStream = adminSseResult.getResponse().getContentAsString(); + logger.debug("admin stream: {}", adminStream); + assertThat( + "There should be at least 2 keep-alive messages for the admin", + count_all_keep_alive_messages(adminStream), + greaterThanOrEqualTo(2)); + assertEquals( + 0, + count_viewer_keep_alive_messages(adminStream), + "There should be no keep-alive messages for the viewer in the admin"); + assertEquals( + count_all_keep_alive_messages(adminStream), + count_admin_keep_alive_messages(adminStream), + "We should only get admin keep-alive messages in the admin SSE stream"); + + // and viewer stream + final String stream = sseResult.getResponse().getContentAsString(); + logger.debug("viewer stream: {}", stream); + assertThat( + "There should be at least 2 keep-alive messages for the viewer", + count_all_keep_alive_messages(stream), + greaterThanOrEqualTo(2)); + assertEquals( + count_all_keep_alive_messages(stream), + count_viewer_keep_alive_messages(stream), + "Admin keep-alive messages should not be sent to viewer SSE stream"); + assertEquals( + 0, + count_admin_keep_alive_messages(stream), + "There should be no keep-alive messages for the admin in the viewer SSE stream"); + }); } } diff --git a/src/test/java/org/tailormap/api/controller/SseParsingUtils.java b/src/test/java/org/tailormap/api/controller/SseParsingUtils.java index 6436ed1d49..74e29b9a0e 100644 --- a/src/test/java/org/tailormap/api/controller/SseParsingUtils.java +++ b/src/test/java/org/tailormap/api/controller/SseParsingUtils.java @@ -46,4 +46,39 @@ int count_completed_messages(String s) { } return count; } + + int count_all_keep_alive_messages(String stream) { + int count = 0; + int index = 0; + final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE + "\""; + while ((index = stream.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } + + int count_viewer_keep_alive_messages(String stream) { + int count = 0; + int index = 0; + final String marker = + "\"details\":{},\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE + "\",\"id\""; + while ((index = stream.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } + + int count_admin_keep_alive_messages(String stream) { + int count = 0; + int index = 0; + final String marker = + "\"details\":null,\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.KEEP_ALIVE + "\""; + while ((index = stream.indexOf(marker, index)) != -1) { + count++; + index += marker.length(); + } + return count; + } } From c38e822984a93f9261a6b2c47b27957dd7602563 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 8 May 2026 13:02:27 +0200 Subject: [PATCH 14/17] HTM-1977: implement geopackage extract --- .mvn/jvm.config | 3 +- pom.xml | 4 + .../controller/LayerExtractController.java | 1 + .../service/CreateLayerExtractService.java | 118 +++++++++++++++--- src/main/resources/application.properties | 2 +- ...LayerExtractControllerIntegrationTest.java | 48 +++++++ src/test/resources/application.properties | 2 +- 7 files changed, 157 insertions(+), 21 deletions(-) diff --git a/.mvn/jvm.config b/.mvn/jvm.config index d3dd6be54f..a85f7f49c1 100644 --- a/.mvn/jvm.config +++ b/.mvn/jvm.config @@ -11,4 +11,5 @@ --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-opens jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED --add-opens jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED ---add-modules=jdk.incubator.vector \ No newline at end of file +--add-modules=jdk.incubator.vector +--enable-native-access=ALL-UNNAMED \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7a89223e16..ae307b8d0f 100644 --- a/pom.xml +++ b/pom.xml @@ -328,6 +328,10 @@ SPDX-License-Identifier: MIT org.geotools gt-geojson-store + + org.geotools + gt-geopkg + org.geotools gt-http diff --git a/src/main/java/org/tailormap/api/controller/LayerExtractController.java b/src/main/java/org/tailormap/api/controller/LayerExtractController.java index b9eb2acfc0..7637f7e308 100644 --- a/src/main/java/org/tailormap/api/controller/LayerExtractController.java +++ b/src/main/java/org/tailormap/api/controller/LayerExtractController.java @@ -279,6 +279,7 @@ private void validateExcelLimits(TMFeatureType featureType, Set attribut } public enum ExtractOutputFormat { + GEOPACKAGE("geopackage", ".gpkg"), CSV("csv", ".csv"), GEOJSON("geojson", ".geojson"), XLSX("xlsx", ".xlsx"), diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java index 7b3e6a0de5..e4088ef38e 100644 --- a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java +++ b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java @@ -7,25 +7,8 @@ import ch.rasc.sse.eventbus.SseEvent; import ch.rasc.sse.eventbus.SseEventBus; -import java.io.File; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Stream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; import org.apache.commons.lang3.StringUtils; +import org.geotools.api.data.DataStore; import org.geotools.api.data.FeatureEvent; import org.geotools.api.data.FileDataStore; import org.geotools.api.data.Query; @@ -43,6 +26,7 @@ import org.geotools.data.shapefile.ShapefileDumper; import org.geotools.factory.CommonFactoryFinder; import org.geotools.feature.SchemaException; +import org.geotools.geopkg.GeoPkgDataStoreFactory; import org.geotools.util.factory.GeoTools; import org.jspecify.annotations.NonNull; import org.jspecify.annotations.Nullable; @@ -66,6 +50,25 @@ import tools.jackson.databind.SerializationFeature; import tools.jackson.databind.json.JsonMapper; +import java.io.File; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + @Service public class CreateLayerExtractService { private static final Logger logger = @@ -218,6 +221,9 @@ public void createLayerExtract( this.emitProgress(clientId, outputFileName, 0, false, "Starting extract"); switch (extractOutputFormat) { + case GEOPACKAGE -> + this.handleGeoPackage( + clientId, inputTmFeatureType, attributes, filter, sortBy, sortOrder, outputFileName); case SHAPE -> this.handleWithShapeDumper( clientId, inputTmFeatureType, attributes, filter, sortBy, sortOrder, outputFileName); @@ -234,6 +240,82 @@ public void createLayerExtract( } } + private void handleGeoPackage( + @NonNull String clientId, + @NonNull TMFeatureType inputTmFeatureType, + @NonNull Set attributes, + Filter filter, + String sortBy, + SortOrder sortOrder, + @NonNull String outputFileName) { + + SimpleFeatureSource inputFeatureSource = null; + DataStore outputDataStore = null; + + try (Transaction outputTransaction = new DefaultTransaction("tailormap-extract-output")) { + inputFeatureSource = featureSourceFactoryHelper.openGeoToolsFeatureSource(inputTmFeatureType); + + Query q = createQuery(inputFeatureSource, attributes, filter, sortBy, sortOrder); + + int featCount = getFeatureCount(inputFeatureSource, q); + if (featCount < 0) { + logger.warn("Could not determine feature count for extract, progress reporting will be omitted"); + } + + outputDataStore = new GeoPkgDataStoreFactory() + .createDataStore(Map.of( + GeoPkgDataStoreFactory.DBTYPE.key, + "geopkg", + GeoPkgDataStoreFactory.DATABASE.key, + getValidatedOutputFile(outputFileName), + GeoPkgDataStoreFactory.CONTENTS_ONLY.key, + false)); + + SimpleFeatureType fType = + DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0])); + outputDataStore.createSchema(fType); + + final AtomicInteger featsAdded = new AtomicInteger(); + if (outputDataStore.getFeatureSource(fType.getName()) instanceof SimpleFeatureStore featureStore) { + featureStore.setTransaction(outputTransaction); + featureStore.addFeatureListener(event -> { + if (event.getType().equals(FeatureEvent.Type.ADDED)) { + featsAdded.getAndIncrement(); + logger.debug("Added feature {}", featsAdded.get()); + } + if (featCount > 0) { + if (featsAdded.get() % progressReportInterval == 0) { + this.emitProgress( + clientId, + outputFileName, + (int) ((featsAdded.doubleValue() / featCount) * 100), + false, + null); + } + } + }); + featureStore.addFeatures(inputFeatureSource.getFeatures(q)); + outputTransaction.commit(); + outputDataStore.dispose(); + this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully"); + } + } catch (SchemaException | IOException | IllegalArgumentException e) { + emitError(clientId, e.getMessage()); + logger.error("Creating extract failed", e); + } finally { + if (inputFeatureSource != null) { + try { + inputFeatureSource.getDataStore().dispose(); + } catch (Exception e) { + logger.warn("Error disposing datastore for feature source {}", inputFeatureSource.getName(), e); + } + } + if (outputDataStore != null) { + outputDataStore.dispose(); + } + } + } + private void handleSingleFileFormats( @NonNull String clientId, @NonNull TMFeatureType inputTmFeatureType, diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 0b14db7c80..f6f4cecb20 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -29,7 +29,7 @@ tailormap-api.features.wfs_count_exact=false tailormap-api.feature.info.maxitems=30 # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values -tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape +tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk # tailormap-api.extract.cleanup-minutes=120 # the directory where the extract output files are stored, should be writable by the application diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index 0152f1e41f..276aea3048 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -24,6 +24,7 @@ import static org.tailormap.api.TestRequestProcessor.setServletPath; import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.CSV; import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.GEOJSON; +import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.GEOPACKAGE; import static org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat.SHAPE; import static org.tailormap.api.controller.TestUrls.layerBegroeidTerreindeelPostgis; import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp; @@ -530,4 +531,51 @@ void should_export_large_filter_to_shape() throws Exception { assertEquals(6, extensions.size(), "Expected 6 unique file extensions in the shapefile zip"); } } + + @Test + void should_export_to_geopackage() throws Exception { + final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; + mockMvc.perform(post(extractUrl) + .accept(MediaType.APPLICATION_JSON) + .with(setServletPath(extractUrl)) + .with(csrf()) + .param("attributes", "") + .param("outputFormat", GEOPACKAGE.getValue()) + .param("filter", StaticTestData.get("large_cql_filter")) + .acceptCharset(StandardCharsets.UTF_8) + .characterEncoding(StandardCharsets.UTF_8) + .contentType(MediaType.APPLICATION_FORM_URLENCODED)) + .andExpect(status().isAccepted()); + + // The SseEventBus may dispatch events slightly after the POST returns. + // Awaitility polls the buffered SSE response until the expected content appears. + Awaitility.await() + .atMost(10, SECONDS) + .untilAsserted(() -> assertThat( + sseResult.getResponse().getContentAsString(), containsString("Extract task received"))); + + Awaitility.await().pollInterval(5, SECONDS).atMost(30, SECONDS).untilAsserted(() -> { + final String stream = sseResult.getResponse().getContentAsString(); + assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1)); + }); + + final String lastCompletedEventJson = + getLastCompletedEventJson(sseResult.getResponse().getContentAsString()); + assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100)); + + final String extractedDownloadId = getDownloadId(lastCompletedEventJson); + assertThat(extractedDownloadId, containsString(GEOPACKAGE.getExtension())); + + final String downloadUrl = apiBasePath + layerBegroeidTerreindeelPostgis + downloadPath + extractedDownloadId; + mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl))) + .andExpect(status().isOk()) + .andExpect(result -> { + String contentType = result.getResponse().getContentType(); + assertThat(contentType, containsString("application/geopackage+sqlite3")); + + String contentDisposition = result.getResponse().getHeader("Content-Disposition"); + assertThat(contentDisposition, containsString("attachment; filename=")); + assertThat(contentDisposition, containsString(extractedDownloadId)); + }); + } } diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 39c94a86cb..25717925e1 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -3,7 +3,7 @@ tailormap-api.admin.base-path=/api/admin management.endpoints.web.base-path=/api/actuator tailormap-api.new-admin-username=tm-admin # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values -tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape +tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage # the number of features after which a progress report is sent back to the viewer, to update the progress bar tailormap-api.extract.progress-report-interval=10 # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk From 356396021097c19fab41793fc848efd4235f2944 Mon Sep 17 00:00:00 2001 From: Mark Prins <1165786+mprins@users.noreply.github.com> Date: Fri, 8 May 2026 13:43:43 +0200 Subject: [PATCH 15/17] HTM-1977: Implement GeoPackage export using low-level API, which seems more robust Add --enable-native-access=ALL-UNNAMED for the native sqlite/geopackage driver --- pom.xml | 2 + .../service/CreateLayerExtractService.java | 128 +++++++++--------- ...LayerExtractControllerIntegrationTest.java | 36 ++++- 3 files changed, 101 insertions(+), 65 deletions(-) diff --git a/pom.xml b/pom.xml index ae307b8d0f..1f6cd4d371 100644 --- a/pom.xml +++ b/pom.xml @@ -1017,6 +1017,8 @@ SPDX-License-Identifier: MIT See https://github.com/orgs/paketo-buildpacks/discussions/241 --> -XX:MaxDirectMemorySize=256M + + --enable-native-access=ALL-UNNAMED + ${geotools.version} org.geotools gt-geojson-store + + 35-SNAPSHOT org.geotools @@ -701,20 +704,21 @@ SPDX-License-Identifier: MIT https://repo.osgeo.org/repository/release/ + true - repo.b3p.nl - B3Partners public repository - https://repo.b3p.nl/nexus/repository/public/ + OSGeo-snapshots + Snapshots hosted by OSGeo + https://repo.osgeo.org/repository/snapshot/ true - OSGeo-snapshots - Snapshots hosted by OSGeo - https://repo.osgeo.org/repository/snapshot/ + repo.b3p.nl + B3Partners public repository + https://repo.b3p.nl/nexus/repository/public/ @@ -1011,14 +1015,14 @@ SPDX-License-Identifier: MIT alternatively, use environment variable BPL_JVM_CLASS_ADJUSTMENT when deploying the docker container --> 120% - - -XX:MaxDirectMemorySize=256M - - --enable-native-access=ALL-UNNAMED + + -XX:MaxDirectMemorySize=256M --enable-native-access=ALL-UNNAMED - 35-SNAPSHOT org.geotools @@ -703,15 +701,6 @@ SPDX-License-Identifier: MIT Releases hosted by OSGeo https://repo.osgeo.org/repository/release/ - - - - true - - OSGeo-snapshots - Snapshots hosted by OSGeo - https://repo.osgeo.org/repository/snapshot/ - true diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index b3f1513cb2..fa772b895c 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -29,7 +29,7 @@ tailormap-api.features.wfs_count_exact=false tailormap-api.feature.info.maxitems=30 # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values -tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage +tailormap-api.extract.allowed-outputformats=csv,xlsx,shape,geopackage # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk # tailormap-api.extract.cleanup-minutes=120 # the (base) directory where the extract output files are stored, should be writable by the application diff --git a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java index 5dce90fe6d..ca1a252539 100644 --- a/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java +++ b/src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java @@ -54,6 +54,7 @@ import org.junit.jupiter.api.TestMethodOrder; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junitpioneer.jupiter.DisabledUntil; import org.junitpioneer.jupiter.Stopwatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -406,6 +407,10 @@ void should_export_large_filter_to_excel() throws Exception { } } + @DisabledUntil( + date = "2026-06-01", + reason = + "This test relies on GeoTools 35.0 (or 34.4), see https://osgeo-org.atlassian.net/browse/GEOT-7894") @Test void should_export_large_filter_to_geojson() throws Exception { final String extractUrl = apiBasePath + layerBegroeidTerreindeelPostgis + extractPath + sseClientId; diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 5eb656726d..31e331d7aa 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -3,7 +3,7 @@ tailormap-api.admin.base-path=/api/admin management.endpoints.web.base-path=/api/actuator tailormap-api.new-admin-username=tm-admin # see org.tailormap.api.controller.LayerExtractController.ExtractOutputFormat for valid values -tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage +tailormap-api.extract.allowed-outputformats=csv,xlsx,shape,geopackage # the number of features after which a progress report is sent back to the viewer, to update the progress bar tailormap-api.extract.progress-report-interval=10 # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk