Skip to content

Commit 9da7bea

Browse files
harshachCopilotCopilotharsh-vadormohityadav766
authored
Add export functionality for search (#26900)
* Add export functionality for search * Address comments * Address comments * Fix tests * Fix linter issues * Address comments * Address comments * Address comments * Improve export perf * Fix search export as per comments * fix linting * Update openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Add fullyQualifiedName tiebreaker sort for stable search_after pagination Agent-Logs-Url: https://github.com/open-metadata/OpenMetadata/sessions/b371219a-08ed-48c1-94d0-17a752d99e68 Co-authored-by: harshach <38649+harshach@users.noreply.github.com> * remove async methods * work on search related UI improvements * add locale files * fix ui checkstyle * fix search export spec * address gitar * Add from and Size * add support for paginated data * address gitar & fix failing unit test * Fix unit test * Address latest comments * Address latest comments * Fix playwright tests * Revert unrelated changes from search export PR Remove WebsocketNotificationHandler UUID overloads (dead code after async endpoint deletion) and revert OntologyExplorer files that are unrelated to search export. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Address review: fix duplicate SubjectContext and misleading export count - Create SubjectContext once in exportSearchResults and pass to buildExportSearchRequest instead of constructing it twice - Remove inaccurate count from "All matching assets" option since it showed the current tab total, not the cross-index total Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fetch actual cross-index count for "All matching assets" export option When the export modal opens, fire a lightweight count query against the dataAsset index to show the real total instead of the current tab's count. The count renders once available; if the query fails the modal remains usable without a count. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: harshach <38649+harshach@users.noreply.github.com> Co-authored-by: Harsh Vador <58542468+harsh-vador@users.noreply.github.com> Co-authored-by: Harsh Vador <harsh.vador@somaiya.edu> Co-authored-by: mohitdeuex <mohit.y@deuexsolutions.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 58a76d7 commit 9da7bea

37 files changed

Lines changed: 2276 additions & 74 deletions

File tree

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/SearchResourceIT.java

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99

1010
import com.fasterxml.jackson.databind.JsonNode;
1111
import com.fasterxml.jackson.databind.ObjectMapper;
12+
import java.net.URI;
13+
import java.net.http.HttpClient;
14+
import java.net.http.HttpRequest;
15+
import java.net.http.HttpResponse;
16+
import java.time.Duration;
1217
import java.util.ArrayList;
1318
import java.util.List;
19+
import java.util.concurrent.TimeUnit;
20+
import org.awaitility.Awaitility;
1421
import org.junit.jupiter.api.Test;
1522
import org.junit.jupiter.api.extension.ExtendWith;
1623
import org.junit.jupiter.api.parallel.Execution;
@@ -1464,4 +1471,148 @@ void testSearchWithoutTrackTotalHits(TestNamespace ns) throws Exception {
14641471
JsonNode root = OBJECT_MAPPER.readTree(response);
14651472
assertTrue(root.has("hits"), "Response should have hits");
14661473
}
1474+
1475+
// ===================================================================
1476+
// SEARCH EXPORT TESTS (streaming CSV endpoint)
1477+
// ===================================================================
1478+
1479+
private static final HttpClient HTTP_CLIENT =
1480+
HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
1481+
1482+
private HttpResponse<String> httpGetExport(String path) throws Exception {
1483+
String baseUrl = SdkClients.getServerUrl();
1484+
String token = SdkClients.getAdminToken();
1485+
1486+
HttpRequest request =
1487+
HttpRequest.newBuilder()
1488+
.uri(URI.create(baseUrl + path))
1489+
.header("Authorization", "Bearer " + token)
1490+
.header("Accept", "text/csv")
1491+
.timeout(Duration.ofSeconds(30))
1492+
.GET()
1493+
.build();
1494+
1495+
return HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
1496+
}
1497+
1498+
@Test
1499+
void testExportReturnsOkWithCsvContent(TestNamespace ns) throws Exception {
1500+
HttpResponse<String> response = httpGetExport("/v1/search/export?q=*&index=table_search_index");
1501+
1502+
assertEquals(200, response.statusCode(), "Export should return HTTP 200");
1503+
1504+
String body = response.body();
1505+
assertTrue(
1506+
body.startsWith("Entity Type,Service Name"), "Response should start with CSV header");
1507+
}
1508+
1509+
@Test
1510+
void testExportWithSizeLimitsRows(TestNamespace ns) throws Exception {
1511+
createTestTable(ns, "export_size_test_1");
1512+
createTestTable(ns, "export_size_test_2");
1513+
createTestTable(ns, "export_size_test_3");
1514+
1515+
Awaitility.await()
1516+
.atMost(15, TimeUnit.SECONDS)
1517+
.pollInterval(500, TimeUnit.MILLISECONDS)
1518+
.until(
1519+
() -> {
1520+
HttpResponse<String> r =
1521+
httpGetExport("/v1/search/export?q=export_size_test&index=table_search_index");
1522+
return r.statusCode() == 200 && r.body().split("\n").length >= 4;
1523+
});
1524+
1525+
HttpResponse<String> allResponse =
1526+
httpGetExport("/v1/search/export?q=export_size_test&index=table_search_index");
1527+
assertEquals(200, allResponse.statusCode());
1528+
int allLineCount = allResponse.body().split("\n").length;
1529+
1530+
HttpResponse<String> limitedResponse =
1531+
httpGetExport("/v1/search/export?q=export_size_test&index=table_search_index&size=1");
1532+
assertEquals(200, limitedResponse.statusCode());
1533+
int limitedLineCount = limitedResponse.body().split("\n").length;
1534+
1535+
// limitedResponse should have header + at most 1 data row = 2 lines
1536+
assertTrue(
1537+
limitedLineCount <= 2,
1538+
"Export with size=1 should have at most 2 lines (header + 1 row), got " + limitedLineCount);
1539+
assertTrue(
1540+
allLineCount >= limitedLineCount,
1541+
"Export without size should have >= rows than export with size=1");
1542+
}
1543+
1544+
@Test
1545+
void testExportWithoutSizeExportsAll(TestNamespace ns) throws Exception {
1546+
HttpResponse<String> response = httpGetExport("/v1/search/export?q=*&index=table_search_index");
1547+
1548+
assertEquals(200, response.statusCode(), "Export without size should return 200");
1549+
assertTrue(
1550+
response.body().contains("Entity Type,Service Name"), "Response should contain CSV header");
1551+
}
1552+
1553+
@Test
1554+
void testExportWithSizeAndSortParameters(TestNamespace ns) throws Exception {
1555+
HttpResponse<String> response =
1556+
httpGetExport(
1557+
"/v1/search/export?q=*&index=table_search_index&size=5&sort_field=name.keyword&sort_order=asc");
1558+
1559+
assertEquals(200, response.statusCode());
1560+
assertTrue(response.body().startsWith("Entity Type,Service Name"));
1561+
}
1562+
1563+
@Test
1564+
void testExportWithDataAssetIndex(TestNamespace ns) throws Exception {
1565+
HttpResponse<String> response = httpGetExport("/v1/search/export?q=*&index=dataAsset");
1566+
1567+
assertEquals(200, response.statusCode(), "Export with dataAsset index should return 200");
1568+
assertTrue(response.body().startsWith("Entity Type,Service Name"));
1569+
}
1570+
1571+
@Test
1572+
void testExportWithFromAndSizeForPagination(TestNamespace ns) throws Exception {
1573+
createTestTable(ns, "export_page_test_a");
1574+
createTestTable(ns, "export_page_test_b");
1575+
createTestTable(ns, "export_page_test_c");
1576+
1577+
Awaitility.await()
1578+
.atMost(15, TimeUnit.SECONDS)
1579+
.pollInterval(500, TimeUnit.MILLISECONDS)
1580+
.until(
1581+
() -> {
1582+
HttpResponse<String> r =
1583+
httpGetExport("/v1/search/export?q=export_page_test&index=table_search_index");
1584+
return r.statusCode() == 200 && r.body().split("\n").length >= 4;
1585+
});
1586+
1587+
HttpResponse<String> page1 =
1588+
httpGetExport(
1589+
"/v1/search/export?q=export_page_test&index=table_search_index"
1590+
+ "&sort_field=name.keyword&sort_order=asc&from=0&size=1");
1591+
assertEquals(200, page1.statusCode());
1592+
String[] page1Lines = page1.body().split("\n");
1593+
assertTrue(page1Lines.length <= 2, "from=0&size=1 should return at most 2 lines");
1594+
1595+
HttpResponse<String> page2 =
1596+
httpGetExport(
1597+
"/v1/search/export?q=export_page_test&index=table_search_index"
1598+
+ "&sort_field=name.keyword&sort_order=asc&from=1&size=1");
1599+
assertEquals(200, page2.statusCode());
1600+
String[] page2Lines = page2.body().split("\n");
1601+
assertTrue(page2Lines.length <= 2, "from=1&size=1 should return at most 2 lines");
1602+
1603+
if (page1Lines.length == 2 && page2Lines.length == 2) {
1604+
assertFalse(
1605+
page1Lines[1].equals(page2Lines[1]), "Page 1 and page 2 should return different rows");
1606+
}
1607+
}
1608+
1609+
@Test
1610+
void testExportWithFromBeyondResults(TestNamespace ns) throws Exception {
1611+
HttpResponse<String> response =
1612+
httpGetExport("/v1/search/export?q=*&index=table_search_index&from=999999&size=10");
1613+
1614+
assertEquals(200, response.statusCode());
1615+
String[] lines = response.body().split("\n");
1616+
assertEquals(1, lines.length, "Export beyond results should only contain header");
1617+
}
14671618
}

openmetadata-service/src/main/java/org/openmetadata/service/resources/ai/McpExecutionResource.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,16 +110,19 @@ public ResultList<McpExecution> list(
110110
ResourceContextInterface resourceContext = McpExecutionContext.builder().build();
111111
authorizer.authorize(securityContext, operationContext, resourceContext);
112112

113+
if ((startTs == null) != (endTs == null)) {
114+
throw new IllegalArgumentException("Both startTs and endTs must be provided together");
115+
}
116+
113117
ListFilter filter = new ListFilter(org.openmetadata.schema.type.Include.ALL);
114118
if (serverId != null) {
115119
filter.addQueryParam("serverId", serverId.toString());
116120
}
117121

118122
if (startTs != null && endTs != null) {
119123
return repository.listWithOffset(null, filter, limitParam, startTs, endTs, false, false);
120-
} else {
121-
return repository.listWithOffset(null, filter, limitParam, false);
122124
}
125+
return repository.listWithOffset(null, filter, limitParam, false);
123126
}
124127

125128
@GET

openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import jakarta.ws.rs.core.MediaType;
4141
import jakarta.ws.rs.core.Response;
4242
import jakarta.ws.rs.core.SecurityContext;
43+
import jakarta.ws.rs.core.StreamingOutput;
4344
import jakarta.ws.rs.core.UriInfo;
4445
import java.io.IOException;
4546
import java.util.ArrayList;
@@ -74,6 +75,7 @@
7475
import org.openmetadata.service.search.SearchClient;
7576
import org.openmetadata.service.search.SearchHealthStatus;
7677
import org.openmetadata.service.search.SearchRepository;
78+
import org.openmetadata.service.search.SearchResultCsvExporter;
7779
import org.openmetadata.service.search.SearchUtils;
7880
import org.openmetadata.service.search.indexes.SearchIndex;
7981
import org.openmetadata.service.security.Authorizer;
@@ -255,6 +257,128 @@ public Response search(
255257
return searchRepository.search(request, subjectContext);
256258
}
257259

260+
@GET
261+
@Path("/export")
262+
@Produces("text/csv; charset=utf-8")
263+
@Operation(
264+
operationId = "exportSearchResults",
265+
summary = "Export search results as CSV (streaming)",
266+
description =
267+
"Exports the current search results as a streaming CSV download. "
268+
+ "The response is streamed directly to the client without buffering the entire result set in memory.",
269+
responses = {
270+
@ApiResponse(
271+
responseCode = "200",
272+
description = "CSV file stream",
273+
content = @Content(mediaType = "text/csv; charset=utf-8"))
274+
})
275+
public Response exportSearchResults(
276+
@Context SecurityContext securityContext,
277+
@Parameter(description = "Search Query Text") @DefaultValue("*") @QueryParam("q")
278+
String query,
279+
@Parameter(description = "ElasticSearch Index name, defaults to table")
280+
@DefaultValue("table")
281+
@QueryParam("index")
282+
String index,
283+
@Parameter(description = "Filter documents by deleted param. By default deleted is false")
284+
@QueryParam("deleted")
285+
Boolean deleted,
286+
@Parameter(
287+
description =
288+
"Elasticsearch query that will be combined with the query_string query generator from the `query` argument")
289+
@QueryParam("query_filter")
290+
String queryFilter,
291+
@Parameter(description = "Elasticsearch query that will be used as a post_filter")
292+
@QueryParam("post_filter")
293+
String postFilter,
294+
@Parameter(description = "Sort the search results by field")
295+
@DefaultValue("_score")
296+
@QueryParam("sort_field")
297+
String sortFieldParam,
298+
@Parameter(
299+
description = "Sort order asc for ascending or desc for descending, defaults to desc")
300+
@DefaultValue("desc")
301+
@QueryParam("sort_order")
302+
String sortOrder,
303+
@Parameter(
304+
description =
305+
"Maximum number of rows to export. When null, exports all matching results up to the hard cap.")
306+
@QueryParam("size")
307+
Integer size,
308+
@Parameter(
309+
description =
310+
"Starting offset for export. Use with size to export a specific page of results (e.g., from=30&size=15 for page 3).")
311+
@DefaultValue("0")
312+
@QueryParam("from")
313+
int from)
314+
throws IOException {
315+
316+
SubjectContext subjectContext = getSubjectContext(securityContext);
317+
SearchRequest request =
318+
buildExportSearchRequest(
319+
subjectContext,
320+
query,
321+
index,
322+
deleted,
323+
queryFilter,
324+
postFilter,
325+
sortFieldParam,
326+
sortOrder);
327+
328+
int totalHits = searchRepository.countSearchResults(request, subjectContext);
329+
final int effectiveTotal =
330+
Math.max(
331+
(size != null && size > 0) ? Math.min(size, totalHits - from) : totalHits - from, 0);
332+
333+
if (effectiveTotal > SearchResultCsvExporter.MAX_EXPORT_ROWS) {
334+
return Response.status(Response.Status.BAD_REQUEST)
335+
.entity(
336+
String.format(
337+
"Results contain %d rows, max is %d. Please add filters to reduce the result set.",
338+
effectiveTotal, SearchResultCsvExporter.MAX_EXPORT_ROWS))
339+
.type(MediaType.TEXT_PLAIN)
340+
.build();
341+
}
342+
343+
StreamingOutput stream =
344+
output ->
345+
searchRepository.exportSearchResultsCsvStream(
346+
request, subjectContext, effectiveTotal, from, output);
347+
348+
return Response.ok(stream)
349+
.header("Content-Disposition", "attachment; filename=\"search_export.csv\"")
350+
.build();
351+
}
352+
353+
private SearchRequest buildExportSearchRequest(
354+
SubjectContext subjectContext,
355+
String query,
356+
String index,
357+
Boolean deleted,
358+
String queryFilter,
359+
String postFilter,
360+
String sortFieldParam,
361+
String sortOrder) {
362+
String resolvedQuery = nullOrEmpty(query) ? "*" : query;
363+
364+
List<EntityReference> domains = new ArrayList<>();
365+
if (!subjectContext.isAdmin()) {
366+
domains = subjectContext.getUserDomains();
367+
}
368+
369+
return new SearchRequest()
370+
.withQuery(resolvedQuery)
371+
.withIndex(Entity.getSearchRepository().getIndexOrAliasName(index))
372+
.withQueryFilter(queryFilter)
373+
.withPostFilter(postFilter)
374+
.withDeleted(deleted)
375+
.withSortFieldParam(sortFieldParam)
376+
.withSortOrder(sortOrder)
377+
.withDomains(domains)
378+
.withApplyDomainFilter(
379+
!subjectContext.isAdmin() && subjectContext.hasAnyRole(DOMAIN_ONLY_ACCESS_ROLE));
380+
}
381+
258382
@POST
259383
@Path("/preview")
260384
@Consumes(MediaType.APPLICATION_JSON)

openmetadata-service/src/main/java/org/openmetadata/service/search/SearchManagementClient.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,19 @@ public interface SearchManagementClient {
2424
*/
2525
Response search(SearchRequest request, SubjectContext subjectContext) throws IOException;
2626

27+
/**
28+
* Execute a search returning typed results for export.
29+
* Uses the same query-building logic as {@link #search} but returns a {@link SearchResultListMapper}
30+
* instead of a serialized JAX-RS Response, avoiding double serialization/deserialization.
31+
*
32+
* @param request the search request
33+
* @param subjectContext the subject context for RBAC evaluation
34+
* @return typed search results with hits and sort values
35+
* @throws IOException if search execution fails
36+
*/
37+
SearchResultListMapper searchForExport(SearchRequest request, SubjectContext subjectContext)
38+
throws IOException;
39+
2740
/**
2841
* Execute a preview search with custom search settings.
2942
* This is typically used for testing search configurations before applying them.

0 commit comments

Comments
 (0)