Skip to content

Commit 8ded6a3

Browse files
committed
HTM-2017: Improve Excel extract by omitting column auto-sizing on large record sets
1 parent b3032e7 commit 8ded6a3

5 files changed

Lines changed: 246 additions & 50 deletions

File tree

src/main/java/org/tailormap/api/service/CreateLayerExtractService.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import ch.rasc.sse.eventbus.SseEventBus;
1010
import java.io.File;
1111
import java.io.IOException;
12-
import java.io.Serializable;
1312
import java.lang.invoke.MethodHandles;
1413
import java.nio.charset.StandardCharsets;
1514
import java.nio.file.Files;
@@ -269,6 +268,10 @@ private void handleSingleFileFormats(
269268
DataUtilities.createSubType(inputFeatureSource.getSchema(), attributes.toArray(new String[0]));
270269
outputDataStore.createSchema(fType);
271270

271+
if (outputDataStore instanceof ExcelDataStore excelDataStore) {
272+
excelDataStore.setEnableCellAutoSizing(featCount >= 0 && featCount < 1000);
273+
}
274+
272275
final AtomicInteger featsAdded = new AtomicInteger();
273276
if (outputDataStore.getFeatureSource() instanceof SimpleFeatureStore featureStore) {
274277
featureStore.setTransaction(outputTransaction);
@@ -289,8 +292,8 @@ private void handleSingleFileFormats(
289292
});
290293
featureStore.addFeatures(inputFeatureSource.getFeatures(q));
291294
outputTransaction.commit();
292-
this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully");
293295
outputDataStore.dispose();
296+
this.emitProgress(clientId, outputFileName, 100, true, "Extract completed successfully");
294297
} else {
295298
outputDataStore.dispose();
296299
this.emitError(clientId, "Output datastore is not a SimpleFeatureStore, cannot write features");
@@ -372,13 +375,19 @@ private FileDataStore getExtractDataStore(
372375
true));
373376
}
374377
case XLSX -> {
375-
Map<String, Serializable> params = Map.of(
376-
ExcelDataStoreFactory.FILE_PARAM.key,
377-
outputFile,
378-
ExcelDataStoreFactory.SHEET_PARAM.key,
379-
// typeName could have a prefix; for Excel sheet names ':' is disallowed, max length is 31
380-
typeName.substring(typeName.lastIndexOf(":") + 1, Math.min(typeName.length(), 31)));
381-
return (FileDataStore) new ExcelDataStoreFactory().createNewDataStore(params);
378+
// replace any invalid characters such as /\?*[] with '_' and clip to 31 characters because Excel has
379+
// limitations on sheet names. Also clip off any WFS namespace prefix in the type name, which is often
380+
// separated by a ':' character, because ':' is not allowed in Excel sheet names.
381+
typeName = typeName.contains(":")
382+
? typeName.substring(typeName.lastIndexOf(":") + 1).replaceAll("[\\\\/?*\\[\\]:]", "_")
383+
: typeName.replaceAll("[\\\\/?*\\[\\]:]", "_");
384+
typeName = typeName.substring(0, Math.min(typeName.length(), 31));
385+
return (FileDataStore) new ExcelDataStoreFactory()
386+
.createNewDataStore(Map.of(
387+
ExcelDataStoreFactory.FILE_PARAM.key,
388+
outputFile,
389+
ExcelDataStoreFactory.SHEET_PARAM.key,
390+
typeName));
382391
}
383392
case GEOJSON -> {
384393
return (FileDataStore) new GeoJSONDataStoreFactory()

src/test/java/org/tailormap/api/controller/LayerExtractControllerIntegrationTest.java

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import static org.tailormap.api.controller.TestUrls.layerProxiedWithAuthInPublicApp;
3030

3131
import java.io.ByteArrayInputStream;
32-
import java.io.IOException;
3332
import java.io.InputStream;
3433
import java.nio.charset.StandardCharsets;
3534
import java.util.HashSet;
@@ -57,15 +56,13 @@
5756
import org.springframework.test.web.servlet.MvcResult;
5857
import org.tailormap.api.StaticTestData;
5958
import org.tailormap.api.annotation.PostgresIntegrationTest;
60-
import org.tailormap.api.viewer.model.ServerSentEventResponse;
61-
import tools.jackson.databind.ObjectMapper;
6259

6360
@PostgresIntegrationTest
6461
@AutoConfigureMockMvc
6562
@Execution(ExecutionMode.CONCURRENT)
6663
@Stopwatch
6764
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
68-
class LayerExtractControllerIntegrationTest {
65+
class LayerExtractControllerIntegrationTest extends SseParsingUtils {
6966
private static final String extractPath = "/extract/";
7067
private static final String downloadPath = "/extract/download/";
7168
// Use a unique clientId per test instance to avoid cross-test interference
@@ -533,40 +530,4 @@ void should_export_large_filter_to_shape() throws Exception {
533530
assertEquals(6, extensions.size(), "Expected 6 unique file extensions in the shapefile zip");
534531
}
535532
}
536-
537-
/**
538-
* Parse the last non-empty line from the SSE stream that looks something like:
539-
* {@code data:{"details":{"message":"Extract task
540-
* completed","progress":100,"file":"begroeidterreindeel15061479295163305053.csv"},"eventType":"extract-completed","id":"019d6838-7f48-7053-9256-dd4b57c14264"}
541-
* } as JSON and extract the file from the details.
542-
*/
543-
private String getLastCompletedEventJson(String sseMessages) throws IOException {
544-
return java.util.Arrays.stream(sseMessages.split("\\R"))
545-
.map(String::trim)
546-
.filter(line -> !line.isEmpty())
547-
.filter(line -> line.startsWith("data:"))
548-
.filter(line -> line.contains("\"eventType\":\"extract-completed\""))
549-
.reduce((first, second) -> second)
550-
.orElseThrow()
551-
.substring("data:".length());
552-
}
553-
554-
private String getDownloadId(String eventJson) {
555-
return new ObjectMapper()
556-
.readTree(eventJson)
557-
.path("details")
558-
.path("downloadId")
559-
.asString();
560-
}
561-
562-
private int count_completed_messages(String s) {
563-
int count = 0;
564-
int index = 0;
565-
final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + "\"";
566-
while ((index = s.indexOf(marker, index)) != -1) {
567-
count++;
568-
index += marker.length();
569-
}
570-
return count;
571-
}
572533
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
/*
2+
* Copyright (C) 2026 B3Partners B.V.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
package org.tailormap.api.controller;
7+
8+
import static java.util.concurrent.TimeUnit.MINUTES;
9+
import static java.util.concurrent.TimeUnit.SECONDS;
10+
import static org.hamcrest.MatcherAssert.assertThat;
11+
import static org.hamcrest.Matchers.containsString;
12+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
13+
import static org.junit.jupiter.api.Assertions.assertAll;
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.springframework.security.test.web.servlet.request.SecurityMockMvcRequestPostProcessors.csrf;
16+
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
17+
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
18+
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.request;
19+
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
20+
import static org.tailormap.api.TestRequestProcessor.setServletPath;
21+
import static org.tailormap.api.controller.TestUrls.layerOsmPolygonPostgis;
22+
23+
import java.io.ByteArrayInputStream;
24+
import java.io.InputStream;
25+
import java.nio.charset.StandardCharsets;
26+
import java.util.HashMap;
27+
import java.util.Map;
28+
import org.apache.poi.ss.usermodel.CellType;
29+
import org.apache.poi.ss.usermodel.Sheet;
30+
import org.apache.poi.ss.usermodel.Workbook;
31+
import org.apache.poi.ss.usermodel.WorkbookFactory;
32+
import org.apache.poi.util.IOUtils;
33+
import org.awaitility.Awaitility;
34+
import org.junit.jupiter.api.BeforeEach;
35+
import org.junit.jupiter.api.MethodOrderer;
36+
import org.junit.jupiter.api.Test;
37+
import org.junit.jupiter.api.TestMethodOrder;
38+
import org.junit.jupiter.api.parallel.Execution;
39+
import org.junit.jupiter.api.parallel.ExecutionMode;
40+
import org.junitpioneer.jupiter.Issue;
41+
import org.junitpioneer.jupiter.Stopwatch;
42+
import org.springframework.beans.factory.annotation.Autowired;
43+
import org.springframework.beans.factory.annotation.Value;
44+
import org.springframework.boot.webmvc.test.autoconfigure.AutoConfigureMockMvc;
45+
import org.springframework.http.MediaType;
46+
import org.springframework.test.web.servlet.MockMvc;
47+
import org.springframework.test.web.servlet.MvcResult;
48+
import org.tailormap.api.annotation.PostgresIntegrationTest;
49+
50+
@PostgresIntegrationTest
51+
@AutoConfigureMockMvc
52+
@Execution(ExecutionMode.CONCURRENT)
53+
@Issue("HTM-2017: Large Excel export takes long time")
54+
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
55+
class LayerExtractControllerLargeExcelIntegrationTest extends SseParsingUtils {
56+
private static final String extractPath = "/extract/";
57+
private static final String downloadPath = "/extract/download/";
58+
// Use a unique clientId per test instance to avoid cross-test interference
59+
// when running concurrently.
60+
private final String sseClientId = "testcase-" + System.nanoTime();
61+
62+
@Autowired
63+
private MockMvc mockMvc;
64+
65+
@Value("${tailormap-api.base-path}")
66+
private String apiBasePath;
67+
68+
/** SSE connection result; its response buffer accumulates server-sent events. */
69+
private MvcResult sseResult;
70+
71+
@BeforeEach
72+
void start_sse_stream() throws Exception {
73+
final String sseUrl = apiBasePath + "/events/" + sseClientId;
74+
sseResult = mockMvc.perform(get(sseUrl)
75+
.accept(MediaType.TEXT_EVENT_STREAM)
76+
.with(setServletPath(sseUrl))
77+
.acceptCharset(StandardCharsets.UTF_8))
78+
.andExpect(request().asyncStarted())
79+
.andReturn();
80+
}
81+
82+
@Stopwatch
83+
@Test
84+
void should_export_large_dataset_to_excel() throws Exception {
85+
final String extractUrl = apiBasePath + layerOsmPolygonPostgis + extractPath + sseClientId;
86+
mockMvc.perform(post(extractUrl)
87+
.accept(MediaType.APPLICATION_JSON)
88+
.with(setServletPath(extractUrl))
89+
.with(csrf())
90+
.param(
91+
"attributes",
92+
"osm_id,access,addr:housename,addr:housenumber,addr:interpolation,admin_level,aerialway,aeroway,amenity,area,barrier,bicycle,brand,bridge,boundary,building,construction,covered,culvert,cutting,denomination,disused,embankment,foot,generator:source,harbour,highway,historic,horse,intermittent,junction,landuse,layer,leisure,lock,man_made,military,motorcar,name,natural,office,oneway,operator,place,population,power,power_source,public_transport,railway,ref,religion,route,service,shop,sport,surface,toll,tourism,tower:type,tracktype,tunnel,water,waterway,wetland,width,wood,z_order,way_area")
93+
.param("outputFormat", "xlsx")
94+
.acceptCharset(StandardCharsets.UTF_8)
95+
.characterEncoding(StandardCharsets.UTF_8)
96+
.contentType(MediaType.APPLICATION_FORM_URLENCODED))
97+
.andExpect(status().isAccepted());
98+
99+
// The SseEventBus may dispatch events slightly after the POST returns.
100+
// Awaitility polls the buffered SSE response until the expected content appears.
101+
Awaitility.await()
102+
.atMost(10, SECONDS)
103+
.untilAsserted(() -> assertThat(
104+
sseResult.getResponse().getContentAsString(), containsString("Extract task received")));
105+
106+
// should finish in less than 2 minutes
107+
Awaitility.await().pollInterval(5, SECONDS).atMost(2, MINUTES).untilAsserted(() -> {
108+
final String stream = sseResult.getResponse().getContentAsString();
109+
assertThat(count_completed_messages(stream), greaterThanOrEqualTo(1));
110+
});
111+
112+
final String lastCompletedEventJson =
113+
getLastCompletedEventJson(sseResult.getResponse().getContentAsString());
114+
assertThat(lastCompletedEventJson.length(), greaterThanOrEqualTo(100));
115+
116+
final String extractedDownloadId = getDownloadId(lastCompletedEventJson);
117+
assertThat(extractedDownloadId, containsString(".xlsx"));
118+
119+
final String downloadUrl = apiBasePath + layerOsmPolygonPostgis + downloadPath + extractedDownloadId;
120+
MvcResult download = mockMvc.perform(get(downloadUrl).with(setServletPath(downloadUrl)))
121+
.andExpect(status().isOk())
122+
.andExpect(result -> {
123+
String contentType = result.getResponse().getContentType();
124+
assertThat(
125+
contentType,
126+
containsString("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
127+
128+
String contentDisposition = result.getResponse().getHeader("Content-Disposition");
129+
assertThat(contentDisposition, containsString("attachment; filename="));
130+
assertThat(contentDisposition, containsString(extractedDownloadId));
131+
})
132+
.andReturn();
133+
134+
// open the Excel file and check that we have the expected content
135+
// allow reading large files into byte arrays, this is 10x the default value
136+
int rememberMaxOverride = IOUtils.getByteArrayMaxOverride();
137+
IOUtils.setByteArrayMaxOverride(1_000_000_000);
138+
try (InputStream inp = new ByteArrayInputStream(download.getResponse().getContentAsByteArray());
139+
Workbook wb = WorkbookFactory.create(inp)) {
140+
141+
Sheet sheet = wb.getSheetAt(0);
142+
143+
assertAll(
144+
"Check sheet",
145+
() -> assertEquals(
146+
102467 + /*header row*/ 1,
147+
sheet.getPhysicalNumberOfRows(),
148+
() -> "Expected " + 102467 + /*header row*/ 1
149+
+ " rows in the Excel sheet, including header and data rows"),
150+
() -> assertEquals("osm_polygon", sheet.getSheetName(), "Expected sheet name to be osm_polygon"),
151+
() -> assertEquals(
152+
69, sheet.getRow(0).getPhysicalNumberOfCells(), "Expected 69 columns in the header row"));
153+
154+
Map<String, Integer> columnNames = new HashMap<>();
155+
sheet.getRow(0).forEach(cell -> columnNames.put(cell.getStringCellValue(), cell.getColumnIndex()));
156+
157+
assertAll(
158+
"Check first data row",
159+
() -> assertEquals(
160+
CellType.NUMERIC,
161+
sheet.getRow(1).getCell(columnNames.get("osm_id")).getCellType(),
162+
"Expected first cell in header to be numeric"),
163+
() -> assertEquals(
164+
CellType.BLANK,
165+
sheet.getRow(1).getCell(columnNames.get("access")).getCellType(),
166+
"Expected second cell in header to be a string"),
167+
() -> assertEquals(
168+
"meadow",
169+
sheet.getRow(1).getCell(columnNames.get("landuse")).getStringCellValue()),
170+
() -> assertEquals(
171+
68651.3,
172+
sheet.getRow(1).getCell(columnNames.get("way_area")).getNumericCellValue(),
173+
0.1));
174+
} finally {
175+
IOUtils.setByteArrayMaxOverride(rememberMaxOverride);
176+
}
177+
}
178+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright (C) 2026 B3Partners B.V.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*/
6+
package org.tailormap.api.controller;
7+
8+
import org.tailormap.api.viewer.model.ServerSentEventResponse;
9+
import tools.jackson.core.JacksonException;
10+
import tools.jackson.databind.ObjectMapper;
11+
12+
abstract class SseParsingUtils {
13+
14+
/**
15+
* Parse the last non-empty line from the SSE stream that looks something like:
16+
* {@code data:{"details":{"message":"Extract task
17+
* completed","progress":100,"downloadId":"begroeidterreindeel15061479295163305053.csv"},"eventType":"extract-completed","id":"019d6838-7f48-7053-9256-dd4b57c14264"}
18+
* } as JSON and extract the file from the details.
19+
*/
20+
String getLastCompletedEventJson(String sseMessages) {
21+
return java.util.Arrays.stream(sseMessages.split("\\R"))
22+
.map(String::trim)
23+
.filter(line -> !line.isEmpty())
24+
.filter(line -> line.startsWith("data:"))
25+
.filter(line -> line.contains("\"eventType\":\"extract-completed\""))
26+
.reduce((first, second) -> second)
27+
.orElseThrow()
28+
.substring("data:".length());
29+
}
30+
31+
String getDownloadId(String eventJson) throws JacksonException {
32+
return new ObjectMapper()
33+
.readTree(eventJson)
34+
.path("details")
35+
.path("downloadId")
36+
.asString();
37+
}
38+
39+
int count_completed_messages(String s) {
40+
int count = 0;
41+
int index = 0;
42+
final String marker = "\"eventType\":\"" + ServerSentEventResponse.EventTypeEnum.EXTRACT_COMPLETED + "\"";
43+
while ((index = s.indexOf(marker, index)) != -1) {
44+
count++;
45+
index += marker.length();
46+
}
47+
return count;
48+
}
49+
}

src/test/java/org/tailormap/api/controller/TestUrls.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,5 @@ public interface TestUrls {
1414
String layerWegdeelSqlServer = "/app/default/layer/lyr:snapshot-geoserver:sqlserver:wegdeel";
1515
String layerOsmPolygonPostgis = "/app/default/layer/lyr:snapshot-geoserver:postgis:osm_polygon";
1616
String layerProxiedWithAuthInPublicApp = "/app/default/layer/lyr:bestuurlijkegebieden-proxied:Provinciegebied";
17-
String layerWaterdeel = "/app/default/layer/lyr:snapshot-geoserver:oracle:WATERDEEL";
1817
String layerKadastraalPerceel = "/app/default/layer/lyr:snapshot-geoserver:postgis:kadastraal_perceel";
1918
}

0 commit comments

Comments
 (0)