Skip to content

Commit e46618e

Browse files
committed
Address code review
1 parent 3563960 commit e46618e

5 files changed

Lines changed: 102 additions & 68 deletions

File tree

pom.xml

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ SPDX-License-Identifier: MIT
322322
<dependency>
323323
<groupId>org.geotools</groupId>
324324
<artifactId>gt-excel-writer</artifactId>
325-
<version>[35-SNAPSHOT,)</version>
325+
<!-- gt-excel-writer is not in the gt-bom, so we need to specify the version here -->
326+
<version>${geotools.version}</version>
326327
</dependency>
327328
<dependency>
328329
<groupId>org.geotools</groupId>
@@ -708,14 +709,6 @@ SPDX-License-Identifier: MIT
708709
<name>B3Partners public repository</name>
709710
<url>https://repo.b3p.nl/nexus/repository/public/</url>
710711
</repository>
711-
<repository>
712-
<snapshots>
713-
<enabled>true</enabled>
714-
</snapshots>
715-
<id>OSGeo-snapshots</id>
716-
<name>Snapshots hosted by OSGeo</name>
717-
<url>https://repo.osgeo.org/repository/snapshot/</url>
718-
</repository>
719712
</repositories>
720713
<pluginRepositories />
721714
<build>
@@ -1011,14 +1004,14 @@ SPDX-License-Identifier: MIT
10111004
alternatively, use environment variable BPL_JVM_CLASS_ADJUSTMENT when deploying the docker container
10121005
-->
10131006
<BPE_DEFAULT_BPL_JVM_CLASS_ADJUSTMENT>120%</BPE_DEFAULT_BPL_JVM_CLASS_ADJUSTMENT>
1014-
<!-- JVM default is the same as -Xmx. However, the Paketo Java Buildpack memory calculator sets it to
1015-
10M by default, which is too low causing OOM in our application (Netty Solr client and Hikari use more
1016-
than 10MB in direct buffer pools after some time), set it to 256M
1017-
See https://github.com/orgs/paketo-buildpacks/discussions/241
1018-
-->
1019-
<BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> -XX:MaxDirectMemorySize=256M</BPE_APPEND_JAVA_TOOL_OPTIONS>
1020-
<!-- for GeoPackage support which uses a native driver -->
1021-
<BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> --enable-native-access=ALL-UNNAMED</BPE_APPEND_JAVA_TOOL_OPTIONS>
1007+
<!--
1008+
- JVM default is the same as -Xmx. However, the Paketo Java Buildpack memory calculator sets it to
1009+
10M by default, which is too low causing OOM in our application (Netty Solr client and Hikari use more
1010+
than 10MB in direct buffer pools after some time), set it to 256M
1011+
See https://github.com/orgs/paketo-buildpacks/discussions/241
1012+
1013+
- Enable native access for GeoPackage support which uses a native driver -->
1014+
<BPE_APPEND_JAVA_TOOL_OPTIONS xml:space="preserve"> -XX:MaxDirectMemorySize=256M --enable-native-access=ALL-UNNAMED</BPE_APPEND_JAVA_TOOL_OPTIONS>
10221015
<!-- Headroom is used by the memory calculator to reduce the max total memory limit. The default is 0%,
10231016
but since Tailormap is usually run with unconstrained container memory, set it to 10% to prevent taking
10241017
too much host memory. Although Tailormap should not exhaust heap memory, reduce it as a preventive safety

src/main/java/org/tailormap/api/service/CreateLayerExtractService.java

Lines changed: 83 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,7 @@
77

88
import ch.rasc.sse.eventbus.SseEvent;
99
import ch.rasc.sse.eventbus.SseEventBus;
10-
import java.io.File;
11-
import java.io.IOException;
12-
import java.lang.invoke.MethodHandles;
13-
import java.nio.charset.StandardCharsets;
14-
import java.nio.file.Files;
15-
import java.nio.file.Path;
16-
import java.time.Instant;
17-
import java.util.ArrayList;
18-
import java.util.Comparator;
19-
import java.util.List;
20-
import java.util.Map;
21-
import java.util.Objects;
22-
import java.util.Set;
23-
import java.util.concurrent.TimeUnit;
24-
import java.util.concurrent.atomic.AtomicInteger;
25-
import java.util.stream.Stream;
26-
import java.util.zip.ZipEntry;
27-
import java.util.zip.ZipOutputStream;
10+
import jakarta.annotation.PostConstruct;
2811
import org.apache.commons.lang3.StringUtils;
2912
import org.geotools.api.data.FeatureEvent;
3013
import org.geotools.api.data.FileDataStore;
@@ -56,7 +39,6 @@
5639
import org.springframework.scheduling.annotation.Scheduled;
5740
import org.springframework.stereotype.Service;
5841
import org.springframework.transaction.annotation.Transactional;
59-
import org.springframework.web.server.ResponseStatusException;
6042
import org.tailormap.api.controller.LayerExtractController;
6143
import org.tailormap.api.geotools.collection.ProgressReportingFeatureCollection;
6244
import org.tailormap.api.geotools.data.excel.ExcelDataStore;
@@ -68,6 +50,26 @@
6850
import tools.jackson.databind.SerializationFeature;
6951
import tools.jackson.databind.json.JsonMapper;
7052

53+
import java.io.File;
54+
import java.io.IOException;
55+
import java.io.UncheckedIOException;
56+
import java.lang.invoke.MethodHandles;
57+
import java.nio.charset.StandardCharsets;
58+
import java.nio.file.Files;
59+
import java.nio.file.Path;
60+
import java.time.Instant;
61+
import java.util.ArrayList;
62+
import java.util.Comparator;
63+
import java.util.List;
64+
import java.util.Map;
65+
import java.util.Objects;
66+
import java.util.Set;
67+
import java.util.concurrent.TimeUnit;
68+
import java.util.concurrent.atomic.AtomicInteger;
69+
import java.util.stream.Stream;
70+
import java.util.zip.ZipEntry;
71+
import java.util.zip.ZipOutputStream;
72+
7173
@Service
7274
public class CreateLayerExtractService {
7375
private static final Logger logger =
@@ -77,9 +79,13 @@ public class CreateLayerExtractService {
7779
private final FeatureSourceFactoryHelper featureSourceFactoryHelper;
7880
private final FilterFactory ff = CommonFactoryFinder.getFilterFactory(GeoTools.getDefaultHints());
7981

82+
private static final String EXTRACT_SUBDIRECTORY = "tm-extracts";
8083
// we can safely use the tmp dir as a default here because we are running in a docker container without a shell so
8184
// access is limited
85+
// Base directory from config; actual export dir is <base>/tm-extracts
8286
@Value("${tailormap-api.extract.location:#{systemProperties['java.io.tmpdir']}}")
87+
private String exportFilesBaseLocation;
88+
8389
private String exportFilesLocation;
8490

8591
@Value("${tailormap-api.extract.cleanup-minutes:120}")
@@ -91,6 +97,19 @@ public class CreateLayerExtractService {
9197
@Value("${tailormap-api.features.wfs_count_exact:false}")
9298
private boolean exactWfsCounts;
9399

100+
@PostConstruct
101+
void initializeExtractDirectory() {
102+
try {
103+
Path exportRoot = Path.of(exportFilesBaseLocation, EXTRACT_SUBDIRECTORY);
104+
Files.createDirectories(exportRoot);
105+
this.exportFilesLocation = exportRoot.toRealPath().toString();
106+
logger.info("Using extract output directory: {}", this.exportFilesLocation);
107+
} catch (IOException e) {
108+
throw new UncheckedIOException(
109+
"Failed to initialize extract directory under base path: " + exportFilesBaseLocation, e);
110+
}
111+
}
112+
94113
public CreateLayerExtractService(
95114
@Qualifier("viewerSseEventBus") SseEventBus eventBus,
96115
JsonMapper jsonMapper,
@@ -249,13 +268,12 @@ private void handleGeoPackage(
249268
@NonNull String outputFileName) {
250269

251270
SimpleFeatureSource inputFeatureSource = null;
252-
File outputFile = null;
271+
File outputFile;
253272
try {
254273
outputFile = getValidatedOutputFile(outputFileName);
255274
if (!logger.isDebugEnabled()) {
256275
// delete in production after JVM exit because the event bus will be reset when the JVM exits, and then
257-
// we
258-
// are unlikely to have a reference to the file anymore.
276+
// we are unlikely to have a reference to the file anymore.
259277
// In debug/development mode we want to keep the file for inspection.
260278
outputFile.deleteOnExit();
261279
}
@@ -347,10 +365,15 @@ private void handleSingleFileFormats(
347365
clientId,
348366
"Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format."
349367
.formatted(featCount, ExcelDataStore.getMaxRows()));
350-
throw new ResponseStatusException(
351-
org.springframework.http.HttpStatus.BAD_REQUEST,
352-
"Extract result contains %d features, which exceeds the maximum of %d for Excel output format. Please refine your filter or choose a different output format."
353-
.formatted(featCount, ExcelDataStore.getMaxRows()));
368+
logger.error(
369+
"Extract result contains {} features, which exceeds the maximum of {} for Excel output format. Please refine your filter or choose a different output format.",
370+
featCount,
371+
ExcelDataStore.getMaxRows());
372+
// nothing we can do now as we are in a background/async process, so we just return without creating an
373+
// extract file.
374+
// The client will receive no extract completed event, and we have already emitted an error message with
375+
// details.
376+
return;
354377
}
355378

356379
outputDataStore = this.getExtractDataStore(
@@ -390,7 +413,7 @@ private void handleSingleFileFormats(
390413
this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features");
391414
logger.error("Output datastore is not a SimpleFeatureStore, cannot write features");
392415
}
393-
} catch (IOException | SchemaException | IllegalArgumentException e) {
416+
} catch (IOException | SchemaException | IllegalArgumentException | NullPointerException e) {
394417
emitError(clientId, e.getMessage());
395418
logger.error("Creating extract failed", e);
396419
} finally {
@@ -527,10 +550,9 @@ private void handleWithShapeDumper(
527550
.resolve(baseName)
528551
.toFile()
529552
.getCanonicalFile();
530-
if (logger.isDebugEnabled()) {
553+
if (!logger.isDebugEnabled()) {
531554
// delete in production after JVM exit because the event bus will be reset when the JVM exits, and then
532-
// we
533-
// are unlikely to have a reference to the file anymore.
555+
// we are unlikely to have a reference to the file anymore.
534556
// In debug/development mode we want to keep the directory for inspection.
535557
outputDirectory.deleteOnExit();
536558
}
@@ -616,7 +638,7 @@ private Query createQuery(
616638
@Scheduled(fixedDelay = 5, timeUnit = TimeUnit.MINUTES, initialDelay = 15)
617639
public void cleanupExpiredExtracts() {
618640
logger.debug("Running expired extracts cleanup...");
619-
List<FileWithAttributes> clientFilesOnDisk = new ArrayList<>();
641+
List<FileWithAttributes> oldDownloadFilesOnDisk = new ArrayList<>();
620642
Set<String> validClientIds = eventBus.getAllClientIds();
621643

622644
// list download files in export location and delete those that are not bound to an active sse stream client
@@ -635,8 +657,12 @@ public void cleanupExpiredExtracts() {
635657
logger.error("Failed to delete unattached extract file {}", filename);
636658
}
637659
} else {
638-
Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
639-
clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
660+
try {
661+
Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
662+
oldDownloadFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
663+
} catch (IllegalArgumentException ignored) {
664+
// not a valid v7 uuid
665+
}
640666
}
641667
});
642668

@@ -651,25 +677,39 @@ public void cleanupExpiredExtracts() {
651677
}
652678
String clientId = parts[1];
653679
if (!validClientIds.contains(clientId)) {
654-
if (!file.delete()) {
655-
logger.error("Failed to delete unattached extract file {}", filename);
680+
try {
681+
deleteDirectoryRecursively(file.toPath());
682+
} catch (IOException e) {
683+
logger.error("Failed to delete unattached extract directory {}", filename);
656684
}
657685
} else {
658-
Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
659-
clientFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
686+
try {
687+
Instant timestampPart = UUIDv7.timestampAsInstant(UUIDv7.fromString(parts[2]));
688+
oldDownloadFilesOnDisk.add(new FileWithAttributes(file, timestampPart, clientId));
689+
} catch (IllegalArgumentException ignored) {
690+
// not a valid v7 uuid
691+
}
660692
}
661693
});
662694
}
663695

664-
// delete any files are older than the cutoff
665-
clientFilesOnDisk.stream()
696+
// delete any files/directories are older than the cutoff
697+
oldDownloadFilesOnDisk.stream()
666698
.filter(f -> f.timestamp()
667699
.isBefore(Instant.now().minusSeconds(TimeUnit.MINUTES.toSeconds(cleanupIntervalMinutes))))
668700
.forEach(f -> {
669-
if (!f.file().delete()) {
670-
logger.error(
671-
"Failed to delete expired extract file {}",
672-
f.file().getName());
701+
if (f.file.isDirectory()) {
702+
try {
703+
deleteDirectoryRecursively(f.file().toPath());
704+
} catch (IOException ignored) {
705+
logger.warn("Failed to delete directory {}", f.file());
706+
}
707+
} else {
708+
if (!f.file().delete()) {
709+
logger.error(
710+
"Failed to delete expired extract file {}",
711+
f.file().getName());
712+
}
673713
}
674714
});
675715
} catch (IOException e) {

src/main/resources/application.properties

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ tailormap-api.feature.info.maxitems=30
3232
tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage
3333
# any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk
3434
# tailormap-api.extract.cleanup-minutes=120
35-
# the directory where the extract output files are stored, should be writable by the application
35+
# the (base) directory where the extract output files are stored, should be writable by the application
36+
# a subdirectory "tm-extracs" will be created to be managed by the application
3637
# tailormap-api.extract.location=/tmp
37-
# the number of features after which a progress report is sent back to the viewer, to update the progress bar
38+
# the number of features after which a progress report is sent back to the viewer, to e.g. update a progress bar
3839
# tailormap-api.extract.progress-report-interval=100
3940

4041
# proxy passthrough regex patterns for layer names, when empty no additional layers are allowed to be proxied
4142
# eg. use vw_t_gi_%s_[a-fA-F0-9]{32} to match `vw_t_gi_layername_70cae9814c6144808f1c9bb921099794` as a sub-layer of layername
4243
# %s is replaced with the layer name from the configuration (this uses String.format() syntax, so be aware of the escaping rules for % and \)
4344
# for regex help see eg: https://regex101.com/ or https://www.regexplanet.com/advanced/java/index.html or https://regexr.com/
4445
tailormap-api.proxy.passthrough.layerpatterns=
45-
## list of allowed host names eg. test.com,localhost (no spaces) to validate the layer name patterns, can be empty to allow any host name
46+
## list of allowed host names e.g. test.com,localhost (no spaces) to validate the layer name patterns, can be empty to allow any host name
4647
tailormap-api.proxy.passthrough.hostnames=
4748

4849
# whether the API should use GeoTools "Unique Collection" (use DISTINCT in SQL statements) or just

src/main/resources/openapi/viewer-api.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,7 +1871,7 @@ paths:
18711871
type: array
18721872
items:
18731873
type: string
1874-
example: '["csv","shape"]'
1874+
example: ["csv","shape"]
18751875

18761876
/{viewerKind}/{viewerName}/layer/{appLayerId}/extract/{clientId}:
18771877
description: 'Export the attributes as shown in the attribute list for a layer.'
@@ -1954,6 +1954,9 @@ paths:
19541954
properties:
19551955
message:
19561956
type: string
1957+
downloadId:
1958+
type: string
1959+
description: 'The id to use to download the file once the extract is completed.'
19571960
required:
19581961
- message
19591962
example:
@@ -2012,7 +2015,6 @@ paths:
20122015
required: true
20132016
schema:
20142017
type: string
2015-
format: uuid
20162018
responses:
20172019
'200':
20182020
description: 'OK'

src/test/resources/application.properties

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ tailormap-api.extract.allowed-outputformats=csv,geojson,xlsx,shape,geopackage
77
# the number of features after which a progress report is sent back to the viewer, to update the progress bar
88
tailormap-api.extract.progress-report-interval=10
99
# any files older than this (in minutes) in the extract output directory will be deleted by a scheduled job, to prevent filling up the disk
10-
tailormap-api.extract.cleanup-minutes=15
11-
# the directory where the extract output files are stored, should be writable by the application
12-
# tailormap-api.extract.location=/tmp
10+
tailormap-api.extract.cleanup-minutes=5
1311

1412
tailormap-api.timeout=5000
1513
tailormap-api.management.hashed-password=#{null}

0 commit comments

Comments
 (0)