Address code review

mprins · mprins · commit e46618e5b6ce · 2026-05-11T12:26:50.000+02:00
diff --git a/pom.xml b/pom.xml
@@ -322,7 +322,8 @@ SPDX-License-Identifier: MIT
         <dependency>
             <groupId>org.geotools</groupId>
             <artifactId>gt-excel-writer</artifactId>
-            <version>[35-SNAPSHOT,)</version>
+            <!-- gt-excel-writer is not in the gt-bom, so we need to specify the version here -->
+            <version>${geotools.version}</version>
         </dependency>
         <dependency>
             <groupId>org.geotools</groupId>
@@ -708,14 +709,6 @@ SPDX-License-Identifier: MIT
             <name>B3Partners public repository</name>
             <url>https://repo.b3p.nl/nexus/repository/public/</url>
         </repository>
-        <repository>
-            <snapshots>
-                <enabled>true</enabled>
-            </snapshots>
-            <id>OSGeo-snapshots</id>
-            <name>Snapshots hosted by OSGeo</name>
-            <url>https://repo.osgeo.org/repository/snapshot/</url>
-        </repository>
     </repositories>
     <pluginRepositories />
     <build>
@@ -1011,14 +1004,14 @@ SPDX-License-Identifier: MIT
                              alternatively, use environment variable BPL_JVM_CLASS_ADJUSTMENT when deploying the docker container
                              -->
                             <BPE_DEFAULT_BPL_JVM_CLASS_ADJUSTMENT>120%</BPE_DEFAULT_BPL_JVM_CLASS_ADJUSTMENT>
-                            <!-- JVM default is the same as -Xmx. However, the Paketo Java Buildpack memory calculator sets it to
-                             10M by default, which is too low causing OOM in our application (Netty Solr client and Hikari use more
-                             than 10MB in direct buffer pools after some time), set it to 256M
-                             See https://github.com/orgs/paketo-buildpacks/discussions/241
-                             -->
-                            <BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> -XX:MaxDirectMemorySize=256M</BPE_APPEND_JAVA_TOOL_OPTIONS>
-                            <!-- for GeoPackage support which uses a native driver -->
-                            <BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> --enable-native-access=ALL-UNNAMED</BPE_APPEND_JAVA_TOOL_OPTIONS>
+                            <!--
+                             - JVM default is the same as -Xmx. However, the Paketo Java Buildpack memory calculator sets it to
+                               10M by default, which is too low causing OOM in our application (Netty Solr client and Hikari use more
+                               than 10MB in direct buffer pools after some time), set it to 256M
+                               See https://github.com/orgs/paketo-buildpacks/discussions/241
+
+                             - Enable native access for GeoPackage support which uses a native driver -->
+                            <BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> -XX:MaxDirectMemorySize=256M --enable-native-access=ALL-UNNAMED</BPE_APPEND_JAVA_TOOL_OPTIONS>
                             <!-- Headroom is used by the memory calculator to reduce the max total memory limit. The default is 0%,
                              but since Tailormap is usually run with unconstrained container memory, set it to 10% to prevent taking
                              too much host memory. Although Tailormap should not exhaust heap memory, reduce it as a preventive safety
diff --git a/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java b/src/main/java/org/tailormap/api/service/CreateLayerExtractService.java
@@ -7,24 +7,7 @@
 
 import ch.rasc.sse.eventbus.SseEvent;
 import ch.rasc.sse.eventbus.SseEventBus;
-import java.io.File;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.time.Instant;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.stream.Stream;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
+import jakarta.annotation.PostConstruct;
 import org.apache.commons.lang3.StringUtils;
 import org.geotools.api.data.FeatureEvent;
 import org.geotools.api.data.FileDataStore;
@@ -56,7 +39,6 @@
 import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
-import org.springframework.web.server.ResponseStatusException;
 import org.tailormap.api.controller.LayerExtractController;
 import org.tailormap.api.geotools.collection.ProgressReportingFeatureCollection;
 import org.tailormap.api.geotools.data.excel.ExcelDataStore;
@@ -68,6 +50,26 @@
 import tools.jackson.databind.SerializationFeature;
 import tools.jackson.databind.json.JsonMapper;
 
+import java.io.File;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.lang.invoke.MethodHandles;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
 @Service
 public class CreateLayerExtractService {
   private static final Logger logger =
@@ -77,9 +79,13 @@ public class CreateLayerExtractService {
   private final FeatureSourceFactoryHelper featureSourceFactoryHelper;
   private final FilterFactory ff = CommonFactoryFinder.getFilterFactory(GeoTools.getDefaultHints());
 
+  private static final String EXTRACT_SUBDIRECTORY = "tm-extracts";
   // we can safely use the tmp dir as a default here because we are running in a docker container without a shell so
   // access is limited
+  // Base directory from config; actual export dir is <base>/tm-extracts
   @Value("${tailormap-api.extract.location:#{systemProperties['java.io.tmpdir']}}")
+  private String exportFilesBaseLocation;
+
   private String exportFilesLocation;
 
   @Value("${tailormap-api.extract.cleanup-minutes:120}")
@@ -91,6 +97,19 @@ public class CreateLayerExtractService {
   @Value("${tailormap-api.features.wfs_count_exact:false}")
   private boolean exactWfsCounts;
 
+  @PostConstruct
+  void initializeExtractDirectory() {
+    try {
+      Path exportRoot = Path.of(exportFilesBaseLocation, EXTRACT_SUBDIRECTORY);
+      Files.createDirectories(exportRoot);
+      this.exportFilesLocation = exportRoot.toRealPath().toString();
+      logger.info("Using extract output directory: {}", this.exportFilesLocation);
+    } catch (IOException e) {
+      throw new UncheckedIOException(
+          "Failed to initialize extract directory under base path: " + exportFilesBaseLocation, e);
+    }
+  }
+
   public CreateLayerExtractService(
       @Qualifier("viewerSseEventBus") SseEventBus eventBus,
       JsonMapper jsonMapper,
@@ -249,13 +268,12 @@ private void handleGeoPackage(
       @NonNull String outputFileName) {
 
     SimpleFeatureSource inputFeatureSource = null;
-    File outputFile = null;
+    File outputFile;
     try {
       outputFile = getValidatedOutputFile(outputFileName);
       if (!logger.isDebugEnabled()) {
         // delete in production after JVM exit because the event bus will be reset when the JVM exits, and then
-        // we
-        // are unlikely to have a reference to the file anymore.
+        // we are unlikely to have a reference to the file anymore.
         // In debug/development mode we want to keep the file for inspection.
         outputFile.deleteOnExit();
       }
@@ -347,10 +365,15 @@ private void handleSingleFileFormats(
             clientId,
             "Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format."
                 .formatted(featCount, ExcelDataStore.getMaxRows()));
-        throw new ResponseStatusException(
-            org.springframework.http.HttpStatus.BAD_REQUEST,
-            "Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format."
-                .formatted(featCount, ExcelDataStore.getMaxRows()));
+        logger.error(
+            "Extract result contains {} features, which exceeds the maximum of {} for Excel output format. Please refine your filter or choose a different output format.",
+            featCount,
+            ExcelDataStore.getMaxRows());
+        // nothing we can do now as we are in a background/async process, so we just return without creating an
+        // extract file.
+        // The client will receive no extract completed event, and we have already emitted an error message with
+        // details.
+        return;
       }
 
       outputDataStore = this.getExtractDataStore(
@@ -390,7 +413,7 @@ private void handleSingleFileFormats(
         this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features");
         logger.error("Output datastore is not a SimpleFeatureStore, cannot write features");
       }
-    } catch (IOException | SchemaException | IllegalArgumentException e) {
+    } catch (IOException | SchemaException | IllegalArgumentException | NullPointerException e) {
       emitError(clientId, e.getMessage());
       logger.error("Creating extract failed", e);
     } finally {
@@ -527,10 +550,9 @@ private void handleWithShapeDumper(
           .resolve(baseName)
           .toFile()
           .getCanonicalFile();
-      if (logger.isDebugEnabled()) {
+      if (!logger.isDebugEnabled()) {
         // delete in production after JVM exit because the event bus will be reset when the JVM exits, and then
-        // we
-        // are unlikely to have a reference to the file anymore.
+        // we are unlikely to have a reference to the file anymore.
         // In debug/development mode we want to keep the directory for inspection.
         outputDirectory.deleteOnExit();
       }
@@ -616,7 +638,7 @@ private Query createQuery(
   @Scheduled(fixedDelay = 5, timeUnit = TimeUnit.MINUTES, initialDelay = 15)
   public void cleanupExpiredExtracts() {
     logger.debug("Running expired extracts cleanup...");
-    List<FileWithAttributes> clientFilesOnDisk = new ArrayList<>();
+    List<FileWithAttributes> oldDownloadFilesOnDisk = new ArrayList<>();
     Set<String> validClientIds = eventBus.getAllClientIds();
 
     // list download files in export location and delete those that are not bound to an active sse stream client
@@ -635,8 +657,12 @@ public void cleanupExpiredExtracts() {
             logger.error("Failed to delete unattached extract file {}", filename);
           }
         } else {
-          Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
-          clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
+          try {
+            Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
+            oldDownloadFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
+          } catch (IllegalArgumentException ignored) {
+            // not a valid v7 uuid
+          }
         }
       });
 
@@ -651,25 +677,39 @@ public void cleanupExpiredExtracts() {
           }
           String clientId = parts[1];
           if (!validClientIds.contains(clientId)) {
-            if (!file.delete()) {
-              logger.error("Failed to delete unattached extract file {}", filename);
+            try {
+              deleteDirectoryRecursively(file.toPath());
+            } catch (IOException e) {
+              logger.error("Failed to delete unattached extract directory {}", filename);
             }
           } else {
-            Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
-            clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
+            try {
+              Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
+              oldDownloadFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
+            } catch (IllegalArgumentException ignored) {
+              // not a valid v7 uuid
+            }
           }
         });
       }
 
-      // delete any files are older than the cutoff
-      clientFilesOnDisk.stream()
+      // delete any files/directories are older than the cutoff
+      oldDownloadFilesOnDisk.stream()
           .filter(f -> f.timestamp()
               .isBefore(Instant.now().minusSeconds(TimeUnit.MINUTES.toSeconds(cleanupIntervalMinutes))))
           .forEach(f -> {
-            if (!f.file().delete()) {
-              logger.error(
-                  "Failed to delete expired extract file {}",
-                  f.file().getName());
+            if (f.file.isDirectory()) {
+              try {
+                deleteDirectoryRecursively(f.file().toPath());
+              } catch (IOException ignored) {
+                logger.warn("Failed to delete directory {}", f.file());
+              }
+            } else {
+              if (!f.file().delete()) {
+                logger.error(
+                    "Failed to delete expired extract file {}",
+                    f.file().getName());
+              }
             }
           });
     } catch (IOException e) {
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
@@ -32,17 +32,18 @@ tailormap-api.feature.info.maxitems=30
 tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage
 # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk
 # tailormap-api.extract.cleanup-minutes=120
-# the directory where the extract output files are stored, should be writable by the application
+# the (base) directory where the extract output files are stored, should be writable by the application
+# a subdirectory "tm-extracs" will be created to be managed by the application
 # tailormap-api.extract.location=/tmp
-# the number of features after which a progress report is sent back to the viewer, to update the progress bar
+# the number of features after which a progress report is sent back to the viewer, to e.g. update a progress bar
 # tailormap-api.extract.progress-report-interval=100
 
 # proxy passthrough regex patterns for layer names, when empty no additional layers are allowed to be proxied
 # eg. use vw_t_gi_%s_[a-fA-F0-9]{32} to match `vw_t_gi_layername_70cae9814c6144808f1c9bb921099794` as a sub-layer of layername
 # %s is replaced with the layer name from the configuration (this uses String.format() syntax, so be aware of the escaping rules for % and \)
 # for regex help see eg: https://regex101.com/ or https://www.regexplanet.com/advanced/java/index.html or https://regexr.com/
 tailormap-api.proxy.passthrough.layerpatterns=
-## list of allowed host names eg. test.com,localhost (no spaces) to validate the layer name patterns, can be empty to allow any host name
+## list of allowed host names e.g. test.com,localhost (no spaces) to validate the layer name patterns, can be empty to allow any host name
 tailormap-api.proxy.passthrough.hostnames=
 
 # whether the API should use GeoTools "Unique Collection" (use DISTINCT in SQL statements) or just
diff --git a/src/main/resources/openapi/viewer-api.yaml b/src/main/resources/openapi/viewer-api.yaml
@@ -1871,7 +1871,7 @@ paths:
                 type: array
                 items:
                   type: string
-                example: '["csv","shape"]'
+                example: ["csv","shape"]
 
   /{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}:
     description: 'Export the attributes as shown in the attribute list for a layer.'
@@ -1954,6 +1954,9 @@ paths:
                 properties:
                   message:
                     type: string
+                  downloadId:
+                    type: string
+                    description: 'The id to use to download the file once the extract is completed.'
                 required:
                   - message
               example:
@@ -2012,7 +2015,6 @@ paths:
           required: true
           schema:
             type: string
-            format: uuid
       responses:
         '200':
           description: 'OK'
diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties
@@ -7,9 +7,7 @@ tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage
 # the number of features after which a progress report is sent back to the viewer, to update the progress bar
 tailormap-api.extract.progress-report-interval=10
 # any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk
-tailormap-api.extract.cleanup-minutes=15
-# the directory where the extract output files are stored, should be writable by the application
-# tailormap-api.extract.location=/tmp
+tailormap-api.extract.cleanup-minutes=5
 
 tailormap-api.timeout=5000
 tailormap-api.management.hashed-password=#{null}