diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java index fd4afde0b90c..613413e6a044 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java @@ -359,7 +359,7 @@ public DataQualityReport getDataQualityReport( return searchRepository.genericAggregation(q, index, searchAggregation, subjectContext); } - public DataQualityReport getDataQualityReport( +public DataQualityReport getDataQualityReport( String q, String aggQuery, String index, String domain, SubjectContext subjectContext) throws IOException { String queryWithDomain = addDomainFilter(q, domain, index); @@ -368,6 +368,94 @@ public DataQualityReport getDataQualityReport( queryWithDomain, index, searchAggregation, subjectContext); } + public List> getDataQualityCheckImpact( + int limit, String testCaseStatus, SubjectContext subjectContext) throws IOException { + + long thirtyDaysAgo = System.currentTimeMillis() - (30L * 24 * 60 * 60 * 1000); + + String query = buildTestCaseImpactQuery(testCaseStatus, thirtyDaysAgo); + + List> testCases = + searchRepository.searchTestCasesForImpact(query, 0, limit * 2, subjectContext); + + List> rankedResults = new ArrayList<>(); + Map maxValues = calculateMaxValues(testCases); + + int downstreamMax = maxValues.getOrDefault("downstreamUsage", 100); + int consumerMax = maxValues.getOrDefault("consumerCount", 50); + int incidentMax = maxValues.getOrDefault("recentIncidents", 10); + + for (Map testCase : testCases) { + int downstreamUsage = + ((Number) testCase.getOrDefault("downstreamUsage", 0)).intValue(); + int consumerCount = ((Number) testCase.getOrDefault("consumerCount", 0)).intValue(); + int recentIncidents = + ((Number) testCase.getOrDefault("recentIncidents", 0)).intValue(); + + double normalizedDownstream = + downstreamMax > 0 ? Math.min((double) downstreamUsage / downstreamMax, 1.0) : 0; + double normalizedConsumer = + consumerMax > 0 ? Math.min((double) consumerCount / consumerMax, 1.0) : 0; + double incidentFactor = + incidentMax > 0 ? Math.min((double) recentIncidents / incidentMax, 1.0) : 0; + + double impactScore = + (0.4 * normalizedDownstream) + (0.3 * normalizedConsumer) + (0.3 * incidentFactor); + impactScore = Math.round(impactScore * 100.0) / 100.0; + + Map result = new HashMap<>(testCase); + result.put("impactScore", impactScore * 100); + rankedResults.add(result); + } + + rankedResults.sort( + (a, b) -> { + double scoreA = ((Number) a.getOrDefault("impactScore", 0.0)).doubleValue(); + double scoreB = ((Number) b.getOrDefault("impactScore", 0.0)).doubleValue(); + return Double.compare(scoreB, scoreA); + }); + + return rankedResults.stream().limit(limit).collect(Collectors.toList()); + } + + private String buildTestCaseImpactQuery(String testCaseStatus, long thirtyDaysAgo) { + StringBuilder query = new StringBuilder(); + query.append("{\"query\": {\"bool\": {\"must\": ["); + + if (testCaseStatus != null && !testCaseStatus.isEmpty()) { + query.append(String.format("{\"term\": {\"testCaseStatus\": \"%s\"}},", testCaseStatus.toLowerCase())); + } + + query.append( + String.format( + "{\"range\": {\"timestamp\": {\"gte\": %d}}}]}}", thirtyDaysAgo / 1000)); + return query.toString(); + } + + private Map calculateMaxValues(List> testCases) { + Map maxValues = new HashMap<>(); + int maxDownstream = 0; + int maxConsumer = 0; + int maxIncident = 0; + + for (Map tc : testCases) { + int downstream = + ((Number) tc.getOrDefault("downstreamUsage", 0)).intValue(); + int consumer = ((Number) tc.getOrDefault("consumerCount", 0)).intValue(); + int incident = ((Number) tc.getOrDefault("recentIncidents", 0)).intValue(); + + if (downstream > maxDownstream) maxDownstream = downstream; + if (consumer > maxConsumer) maxConsumer = consumer; + if (incident > maxIncident) maxIncident = incident; + } + + maxValues.put("downstreamUsage", Math.max(maxDownstream, 1)); + maxValues.put("consumerCount", Math.max(maxConsumer, 1)); + maxValues.put("recentIncidents", Math.max(maxIncident, 1)); + + return maxValues; + } + private String addDomainFilter(String query, String domain, String index) { if (nullOrEmpty(domain)) { return query; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java index fed7540d002f..d2601306a3e4 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java @@ -542,10 +542,53 @@ public DataQualityReport getDataQualityReport( if (nullOrEmpty(aggregationQuery) || nullOrEmpty(index)) { throw new IllegalArgumentException("aggregationQuery and index are required parameters"); } - SubjectContext subjectContext = getSubjectContext(securityContext); +SubjectContext subjectContext = getSubjectContext(securityContext); return repository.getDataQualityReport(query, aggregationQuery, index, domain, subjectContext); } + @GET + @Path("/dataQualityCheckImpact") + @Operation( + operationId = "getDataQualityCheckImpact", + summary = "Get Data Quality Check Impact Ranking", + description = + """ + Get data quality checks ranked by impact score. The impact score is calculated based on: + - Downstream usage (number of downstream entities using the data) + - Consumer count (number of direct consumers) + - Recent incidents (failed test results in the last 30 days) + This helps prioritize which data quality checks are most critical to fix. + """, + responses = { + @ApiResponse( + responseCode = "200", + description = "List of data quality checks ranked by impact", + content = + @Content( + mediaType = "application/json", + schema = @Schema(implementation = List.class))) + }) + public List getDataQualityCheckImpact( + @Context UriInfo uriInfo, + @Context SecurityContext securityContext, + @Parameter( + description = "Number of results to return", + schema = @Schema(type = "integer", defaultValue = "10")) + @QueryParam("limit") + @DefaultValue("10") + int limit, + @Parameter( + description = "Filter by test case status (e.g., Failed, Success)", + schema = @Schema(type = "string")) + @QueryParam("testCaseStatus") + String testCaseStatus) + throws IOException { + List authRequests = getAuthRequestsForListOps(); + authorizer.authorizeRequests(securityContext, authRequests, AuthorizationLogic.ANY); + SubjectContext subjectContext = getSubjectContext(securityContext); + return repository.getDataQualityCheckImpact(limit, testCaseStatus, subjectContext); + } + @POST @Operation( operationId = "createLogicalTestSuite", diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java index 9028199b2b36..df0fe321fa51 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java @@ -2785,11 +2785,92 @@ public Response searchEntityRelationship( fqn, upstreamDepth, downstreamDepth, queryFilter, deleted); } - public Response searchDataQualityLineage( +public Response searchDataQualityLineage( String fqn, int upstreamDepth, String queryFilter, boolean deleted) throws IOException { return searchClient.searchDataQualityLineage(fqn, upstreamDepth, queryFilter, deleted); } + @SuppressWarnings("unchecked") + public List> searchTestCasesForImpact( + String query, int from, int size, SubjectContext subjectContext) throws IOException { + SearchRequest searchRequest = + new SearchRequest() + .withIndex(Entity.TEST_CASE) + .withQuery(query) + .withFrom(from) + .withSize(size) + .withSortFieldParam("timestamp") + .withDeleted(false) + .withSortOrder("desc"); + + Response response = search(searchRequest, subjectContext); + + if (response.getStatus() != 200) { + return new ArrayList<>(); + } + + String json = (String) response.getEntity(); + List> results = new ArrayList<>(); + + try { + JsonNode hitsNode = JsonUtils.extractValue(json, HITS, HITS); + if (hitsNode == null || !hitsNode.isArray()) { + return results; + } + + Map> groupedTestCases = new LinkedHashMap<>(); + + for (Iterator it = hitsNode.elements(); it.hasNext(); ) { + JsonNode jsonNode = it.next(); + JsonNode sourceNode = JsonUtils.extractValue(jsonNode.toString(), SEARCH_SOURCE); + if (sourceNode != null) { + String testCaseFQN = + JsonUtils.extractValue(sourceNode.toString(), FULLY_QUALIFIED_NAME); + String testCaseId = JsonUtils.extractValue(sourceNode.toString(), ID); + String entityFQN = JsonUtils.extractValue(sourceNode.toString(), "entityFQN"); + String testCaseStatus = JsonUtils.extractValue(sourceNode.toString(), "testCaseStatus"); + String timestamp = JsonUtils.extractValue(sourceNode.toString(), "timestamp"); + + if (testCaseFQN == null || testCaseFQN.isEmpty()) { + continue; + } + + Map doc; + if (groupedTestCases.containsKey(testCaseFQN)) { + doc = groupedTestCases.get(testCaseFQN); + int runCount = ((Number) doc.getOrDefault("runCount", 0)).intValue(); + doc.put("runCount", runCount + 1); + + int failedCount = ((Number) doc.getOrDefault("recentIncidents", 0)).intValue(); + if ("failed".equalsIgnoreCase(testCaseStatus)) { + doc.put("recentIncidents", failedCount + 1); + } + } else { + doc = new HashMap<>(); + doc.put("testCaseId", testCaseId); + doc.put("testCaseFullyQualifiedName", testCaseFQN); + doc.put("entityFullyQualifiedName", entityFQN); + doc.put("testCaseStatus", testCaseStatus); + doc.put("timestamp", timestamp); + doc.put("runCount", 1); + doc.put( + "recentIncidents", + "failed".equalsIgnoreCase(testCaseStatus) ? 1 : 0); + doc.put("downstreamUsage", 0); + doc.put("consumerCount", 0); + groupedTestCases.put(testCaseFQN, doc); + } + } + } + + results = new ArrayList<>(groupedTestCases.values()); + } catch (Exception e) { + LOG.error("Error parsing search test cases for impact", e); + } + + return results; + } + public Response searchSchemaEntityRelationship( String fqn, int upstreamDepth, int downstreamDepth, String queryFilter, boolean deleted) throws IOException { diff --git a/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityCheckImpact.json b/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityCheckImpact.json new file mode 100644 index 000000000000..d172c111c85c --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityCheckImpact.json @@ -0,0 +1,74 @@ +{ + "$id": "https://open-metadata.org/schema/tests/dataQualityCheckImpact.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DataQualityCheckImpact", + "description": "Data Quality Check Impact model for ranking checks by business criticality.", + "type": "object", + "javaType": "org.openmetadata.schema.tests.DataQualityCheckImpact", + "properties": { + "testCaseId": { + "description": "Unique identifier of the test case.", + "$ref": "../type/basic.json#/definitions/uuid" + }, + "testCaseFullyQualifiedName": { + "description": "Fully qualified name of the test case.", + "$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "testSuiteId": { + "description": "Unique identifier of the test suite.", + "$ref": "../type/basic.json#/definitions/uuid" + }, + "testSuiteFullyQualifiedName": { + "description": "Fully qualified name of the test suite.", + "$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "entityFullyQualifiedName": { + "description": "The data entity this test case is testing.", + "$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "entityType": { + "description": "The type of entity being tested.", + "type": "string" + }, + "impactScore": { + "description": "Calculated impact score (0-100) based on downstream usage, consumers, and incidents.", + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "downstreamUsage": { + "description": "Number of downstream entities using this data.", + "type": "integer", + "minimum": 0 + }, + "consumerCount": { + "description": "Number of direct consumers of this data.", + "type": "integer", + "minimum": 0 + }, + "recentIncidents": { + "description": "Number of failed test results in the last 30 days.", + "type": "integer", + "minimum": 0 + }, + "lastFailedAt": { + "description": "Timestamp of the most recent test failure.", + "$ref": "../type/basic.json#/definitions/timestamp" + }, + "testCaseStatus": { + "description": "Current status of the test case.", + "$ref": "./basic.json#/definitions/testCaseStatus" + }, + "dataQualityDimension": { + "description": "Data quality dimension category.", + "type": "string" + } + }, + "required": [ + "testCaseId", + "testCaseFullyQualifiedName", + "impactScore", + "testCaseStatus" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/ImpactRanking/ImpactRanking.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/ImpactRanking/ImpactRanking.component.tsx new file mode 100644 index 000000000000..9de975a30482 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/ImpactRanking/ImpactRanking.component.tsx @@ -0,0 +1,219 @@ +/* + * Copyright 2024 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { useTranslation } from 'react-i18next'; +import { useHistory } from 'react-router-dom'; +import { + Button, + Card, + Col, + Empty, + Row, + Skeleton, + Statistic, + Table, + Tag, + Tooltip, +} from 'antd'; +import { isEmpty } from 'lodash'; +import { useCallback, useEffect, useState } from 'react'; +import { getTestCaseDetailPagePath } from '../../../utils/RouterUtils'; +import { + DataQualityCheckImpact, + getDataQualityCheckImpact, +} from '../../../rest/testAPI'; + +interface ImpactRankingProps { + limit?: number; +} + +const ImpactRanking = ({ limit = 10 }: ImpactRankingProps) => { + const { t } = useTranslation(); + const history = useHistory(); + const [impactData, setImpactData] = useState([]); + const [loading, setLoading] = useState(true); + + const fetchImpactData = useCallback(async () => { + setLoading(true); + try { + const data = await getDataQualityCheckImpact({ limit }); + setImpactData(data); + } catch { + setImpactData([]); + } finally { + setLoading(false); + } + }, [limit]); + + useEffect(() => { + fetchImpactData(); + }, [fetchImpactData]); + + const getImpactColor = (score: number) => { + if (score >= 70) return 'red'; + if (score >= 40) return 'orange'; + return 'green'; + }; + + const getImpactLabel = (score: number) => { + if (score >= 70) return t('label.critical'); + if (score >= 40) return t('label.high'); + if (score >= 20) return t('label.medium'); + return t('label.low'); + }; + + const handleTestCaseClick = (testCaseFQN: string) => { + history.push(getTestCaseDetailPagePath(testCaseFQN)); + }; + + const columns = [ + { + title: t('label.rank'), + dataIndex: 'rank', + key: 'rank', + width: 60, + render: (_: unknown, __: unknown, index: number) => index + 1, + }, + { + title: t('label.test-case'), + dataIndex: 'testCaseFullyQualifiedName', + key: 'testCaseFullyQualifiedName', + ellipsis: true, + render: (text: string, record: DataQualityCheckImpact) => ( + + ), + }, + { + title: t('label.entity'), + dataIndex: 'entityFullyQualifiedName', + key: 'entityFullyQualifiedName', + ellipsis: true, + }, + { + title: t('label.status'), + dataIndex: 'testCaseStatus', + key: 'testCaseStatus', + render: (status: string) => ( + {status} + ), + }, + { + title: t('label.impact-score'), + dataIndex: 'impactScore', + key: 'impactScore', + sorter: (a: DataQualityCheckImpact, b: DataQualityCheckImpact) => + b.impactScore - a.impactScore, + render: (score: number) => ( + + {getImpactLabel(score)} + + ), + }, + { + title: t('label.downstream'), + dataIndex: 'downstreamUsage', + key: 'downstreamUsage', + }, + { + title: t('label.consumers'), + dataIndex: 'consumerCount', + key: 'consumerCount', + }, + ]; + + const totalCritical = + impactData.filter((item) => item.impactScore >= 70).length; + const totalHigh = + impactData.filter((item) => item.impactScore >= 40 && item.impactScore < 70) + .length; + const avgScore = + impactData.length > 0 + ? Math.round( + impactData.reduce((sum, item) => sum + item.impactScore, 0) / + impactData.length + ) + : 0; + + if (loading) { + return ; + } + + if (isEmpty(impactData)) { + return ( + + ); + } + + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ); +}; + +export default ImpactRanking; \ No newline at end of file diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityClassBase.ts b/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityClassBase.ts index 2bdadb32aed4..d4899ac76511 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityClassBase.ts +++ b/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityClassBase.ts @@ -14,6 +14,7 @@ import { ReactComponent as TestCaseIcon } from '../../assets/svg/all-activity-v2 import { ReactComponent as DashboardIcon } from '../../assets/svg/ic-dashboard.svg'; import { ReactComponent as TestSuiteIcon } from '../../assets/svg/icon-test-suite.svg'; import DataQualityDashboard from '../../components/DataQuality/DataQualityDashboard/DataQualityDashboard.component'; +import ImpactRanking from '../../components/DataQuality/ImpactRanking/ImpactRanking.component'; import { TestCases } from '../../components/DataQuality/TestCases/TestCases.component'; import { TestSuites } from '../../components/DataQuality/TestSuite/TestSuiteList/TestSuites.component'; import i18n from '../../utils/i18next/LocalUtil'; @@ -70,7 +71,7 @@ class DataQualityClassBase { ]; } - public getDataQualityTab() { +public getDataQualityTab() { return [ { component: DataQualityDashboard, @@ -87,6 +88,11 @@ class DataQualityClassBase { component: TestSuites, label: i18n.t('label.test-suite-plural'), }, + { + key: DataQualityPageTabs.IMPACT, + component: ImpactRanking, + label: i18n.t('label.impact-ranking'), + }, ]; } diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityPage.interface.ts b/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityPage.interface.ts index a8c46d4e718b..ac71a81c0677 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityPage.interface.ts +++ b/openmetadata-ui/src/main/resources/ui/src/pages/DataQuality/DataQualityPage.interface.ts @@ -19,6 +19,7 @@ export enum DataQualityPageTabs { TEST_SUITES = 'test-suites', TEST_CASES = 'test-cases', DASHBOARD = 'dashboard', + IMPACT = 'impact', } export enum DataQualitySubTabs { diff --git a/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts b/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts index f5919e632a92..d4d8bcc12eff 100644 --- a/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts +++ b/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts @@ -492,3 +492,31 @@ export const deleteTestCaseFailedSampleData = async (id: string) => { return response.data; }; + +export type DataQualityCheckImpactParams = { + limit?: number; + testCaseStatus?: TestCaseStatus; +}; + +export interface DataQualityCheckImpact { + testCaseId: string; + testCaseFullyQualifiedName: string; + entityFullyQualifiedName: string; + testCaseStatus: TestCaseStatus; + timestamp: number; + downstreamUsage: number; + consumerCount: number; + recentIncidents: number; + impactScore: number; +} + +export const getDataQualityCheckImpact = async ( + params?: DataQualityCheckImpactParams +): Promise => { + const response = await APIClient.get( + `${testSuiteUrl}/dataQualityCheckImpact`, + { params } + ); + + return response.data; +};