From 51d3b55e2be6fc99b2a60e7674c4d40219457414 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Mon, 6 Apr 2026 16:21:54 +0200
Subject: [PATCH 01/10] update multiplechoice filter to allow virtual scrolling
---
.../multiple-choice-filter.component.html | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
index deb3764dc..4dd37396a 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
@@ -1,8 +1,16 @@
-
=6" [options]="options" [maxSelectedLabels]=1
- placeholder="Choose" [ngModel]="data" (onChange)="update($event.value)"
- ariaLabelledBy="legend-filter-{{filter.displayName | slugify}}"
- fluid>
+ =6"
+ [options]="options"
+ [virtualScroll]="true"
+ [virtualScrollItemSize]="60"
+ [maxSelectedLabels]=1
+ placeholder="Choose"
+ [ngModel]="data"
+ (onChange)="update($event.value)" ariaLabelledBy="legend-filter-{{filter.displayName | slugify}}" fluid>
{{item.label}}
{{item.doc_count}}
From 9bacc1d13a444cfbc3fc1462e8f57c91b689a9ba Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Tue, 12 May 2026 15:51:57 +0200
Subject: [PATCH 02/10] fix autosizing dropdown options
---
.../multiple-choice-filter.component.scss | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
index 5a368d9d5..8321e8f49 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
@@ -18,3 +18,7 @@
vertical-align: top;
flex-grow: 2;
}
+
+::ng-deep .p-multiselect-option {
+ height: auto !important;
+}
\ No newline at end of file
From aadd8abca5b7bc5aeb23864fe64bcef708bc9845 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Tue, 12 May 2026 18:34:18 +0200
Subject: [PATCH 03/10] include AllOptions
---
.../multiple-choice-filter.component.html | 5 ++++-
.../multiple-choice-filter.component.scss | 1 -
.../multiple-choice-filter.component.ts | 18 +++++++++++++++---
3 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
index 4dd37396a..3086d1d45 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
@@ -7,10 +7,13 @@
[options]="options"
[virtualScroll]="true"
[virtualScrollItemSize]="60"
+ [lazy]="true"
[maxSelectedLabels]=1
placeholder="Choose"
[ngModel]="data"
- (onChange)="update($event.value)" ariaLabelledBy="legend-filter-{{filter.displayName | slugify}}" fluid>
+ (onChange)="update($event.value)" ariaLabelledBy="legend-filter-{{filter.displayName | slugify}}" fluid
+ (onPanelShow)="getAllOptionsFromES($event)">
+
{{item.label}}
{{item.doc_count}}
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
index 8321e8f49..4226aad72 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.scss
@@ -14,7 +14,6 @@
.select-label {
margin-top: 2px;
white-space: pre-line;
- max-width: 180px;
vertical-align: top;
flex-grow: 2;
}
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
index 0563c21e2..88f0db8d4 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
@@ -6,6 +6,7 @@ import { TermsAggregator, TermsResult } from '@models/aggregation';
import { SearchService } from '@services';
import { MultipleChoiceFilter, MultipleChoiceFilterOptions } from '@models';
import { BaseFilterComponent } from '../base-filter.component';
+import { MultiSelectLazyLoadEvent } from 'primeng/multiselect';
@Component({
selector: 'ia-multiple-choice-filter',
@@ -15,6 +16,7 @@ import { BaseFilterComponent } from '../base-filter.component';
})
export class MultipleChoiceFilterComponent extends BaseFilterComponent {
options: { label: string; value: string; doc_count: number }[] = [];
+ allOptionsCalled: boolean = false;
constructor(private searchService: SearchService) {
super();
@@ -25,12 +27,22 @@ export class MultipleChoiceFilterComponent extends BaseFilterComponent {
+ private async getOptions(all: boolean = false): Promise {
if (this.filter && this.queryModel) {
- const optionCount = (this.filter.corpusField.filterOptions as MultipleChoiceFilterOptions).option_count;
+ // optionCount is set to the maximum when the filter panel is shown, but not when other filters change
+ const optionCount = all ? 10000 : (this.filter.corpusField.filterOptions as MultipleChoiceFilterOptions).option_count;
const aggregator = new TermsAggregator(this.filter.corpusField, optionCount);
const queryModel = this.queryModel.clone();
queryModel.filterForField(this.filter.corpusField).deactivate();
From e2fdf2fcf68ed1e253fb5ad8746e2897201985df Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Thu, 14 May 2026 12:54:19 +0200
Subject: [PATCH 04/10] only make extra request to ES for large dropdown
filters
---
.../multiple-choice-filter.component.ts | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
index 88f0db8d4..d98807908 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.ts
@@ -33,13 +33,17 @@ export class MultipleChoiceFilterComponent extends BaseFilterComponent 10) {
+ this.getOptions(true);
+ this.allOptionsCalled = true;
+ }
}
private async getOptions(all: boolean = false): Promise {
+ console.log('fire');
if (this.filter && this.queryModel) {
// optionCount is set to the maximum when the filter panel is shown, but not when other filters change
const optionCount = all ? 10000 : (this.filter.corpusField.filterOptions as MultipleChoiceFilterOptions).option_count;
From 39956cd741ac93247db91af032d3a64f84ce50c8 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Thu, 14 May 2026 12:58:26 +0200
Subject: [PATCH 05/10] remove unnecessary parameter from p-multiselect
---
.../multiple-choice-filter/multiple-choice-filter.component.html | 1 -
1 file changed, 1 deletion(-)
diff --git a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
index 3086d1d45..b5ad543cf 100644
--- a/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
+++ b/frontend/src/app/filter/multiple-choice-filter/multiple-choice-filter.component.html
@@ -7,7 +7,6 @@
[options]="options"
[virtualScroll]="true"
[virtualScrollItemSize]="60"
- [lazy]="true"
[maxSelectedLabels]=1
placeholder="Choose"
[ngModel]="data"
From a51b29a8f84111236647059156e9f94b2cedba6a Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Thu, 14 May 2026 14:29:43 +0200
Subject: [PATCH 06/10] show no of unique values to user per field
---
backend/visualization/field_stats.py | 32 +++++++++++++++++++
.../visualization/tests/test_field_stats.py | 11 ++++++-
backend/visualization/urls.py | 3 +-
backend/visualization/views.py | 14 +++++++-
.../corpus-info/corpus-info.component.html | 3 +-
.../corpus-info/corpus-info.component.ts | 6 +++-
.../field-info/field-info.component.html | 14 +++++++-
.../field-info/field-info.component.ts | 1 +
frontend/src/app/models/visualization.ts | 5 +++
frontend/src/app/services/api.service.ts | 8 ++++-
10 files changed, 90 insertions(+), 7 deletions(-)
diff --git a/backend/visualization/field_stats.py b/backend/visualization/field_stats.py
index 1fee67991..e43108b2c 100644
--- a/backend/visualization/field_stats.py
+++ b/backend/visualization/field_stats.py
@@ -50,3 +50,35 @@ def report_coverage(corpus_name):
}
+def cardinality_results(search_result):
+ return search_result['aggregations']['unique_category_count']['value']
+
+def report_cardinality(corpus_name):
+ '''
+ Returns a dict with the number of unique values for each field in the corpus
+ '''
+ es_client = elasticsearch(corpus_name)
+ corpus_conf = CorpusConfiguration.objects.get(corpus__name=corpus_name)
+ cardinality_dict = {}
+
+ query = {
+ "size": 0,
+ "aggs": {
+ "unique_category_count": {
+ "cardinality": {
+ "field": "PLACEHOLDER",
+ "precision_threshold": 10000
+ }
+ }
+ }
+ }
+
+ for field in corpus_conf.fields.all():
+ if field.display_type != 'keyword':
+ cardinality_dict[field.name] = 0
+ else:
+ query_for_field = query
+ query_for_field['aggs']['unique_category_count']['cardinality']['field'] = field.name
+ cardinality_dict[field.name] = cardinality_results(es_client.search(index=corpus_conf.es_index, body=query_for_field))
+
+ return cardinality_dict
diff --git a/backend/visualization/tests/test_field_stats.py b/backend/visualization/tests/test_field_stats.py
index e18e456b5..a58881f5a 100644
--- a/backend/visualization/tests/test_field_stats.py
+++ b/backend/visualization/tests/test_field_stats.py
@@ -1,4 +1,4 @@
-from visualization.field_stats import count_field, count_total, report_coverage
+from visualization.field_stats import count_field, count_total, report_coverage, report_cardinality
def test_count(small_mock_corpus, es_client, index_small_mock_corpus, small_mock_corpus_specs):
@@ -20,3 +20,12 @@ def test_report(small_mock_corpus, es_client, index_small_mock_corpus, small_moc
'content': 1.0,
'genre': 1.0,
}
+
+def test_cardinality(small_mock_corpus):
+ report = report_cardinality(small_mock_corpus)
+ assert report == {
+ 'date': 0,
+ 'title': 0,
+ 'content': 0,
+ 'genre': 3
+ }
diff --git a/backend/visualization/urls.py b/backend/visualization/urls.py
index 62a0ada49..6a16b520f 100644
--- a/backend/visualization/urls.py
+++ b/backend/visualization/urls.py
@@ -8,5 +8,6 @@
path('ngram', NgramView.as_view()),
path('date_term_frequency', DateTermFrequencyView.as_view()),
path('aggregate_term_frequency', AggregateTermFrequencyView.as_view()),
- path('coverage/', FieldCoverageView.as_view())
+ path('coverage/', FieldCoverageView.as_view()),
+ path('cardinality/', FieldCardinalityView.as_view())
]
diff --git a/backend/visualization/views.py b/backend/visualization/views.py
index 3aa85f0c8..a3e004927 100644
--- a/backend/visualization/views.py
+++ b/backend/visualization/views.py
@@ -6,7 +6,7 @@
from django.conf import settings
from addcorpus.permissions import CanSearchCorpus
from tag.permissions import CanSearchTags
-from visualization.field_stats import report_coverage
+from visualization.field_stats import report_coverage, report_cardinality
from addcorpus.permissions import corpus_name_from_request
from api.utils import check_json_keys
@@ -178,3 +178,15 @@ def get(self, request, *args, **kwargs):
corpus = corpus_name_from_request(request)
report = report_coverage(corpus)
return Response(report)
+
+class FieldCardinalityView(APIView):
+ '''
+ Get the number of different values for each filed in a corpus
+ '''
+
+ permission_classes = [CanSearchCorpus]
+
+ def get(self, request, *args, **kwargs):
+ corpus = corpus_name_from_request(request)
+ report = report_cardinality(corpus)
+ return Response(report)
diff --git a/frontend/src/app/corpus/corpus-info/corpus-info.component.html b/frontend/src/app/corpus/corpus-info/corpus-info.component.html
index a2434e1a6..0b26a6e56 100644
--- a/frontend/src/app/corpus/corpus-info/corpus-info.component.html
+++ b/frontend/src/app/corpus/corpus-info/corpus-info.component.html
@@ -32,7 +32,8 @@
id="fields" title="Fields">
+ [coverage]="fieldCoverage ? fieldCoverage[field.name] : undefined"
+ [cardinality]="fieldCardinality ? fieldCardinality[field.name] : undefined">
diff --git a/frontend/src/app/corpus/corpus-info/corpus-info.component.ts b/frontend/src/app/corpus/corpus-info/corpus-info.component.ts
index aafda98f0..d9d7bc161 100644
--- a/frontend/src/app/corpus/corpus-info/corpus-info.component.ts
+++ b/frontend/src/app/corpus/corpus-info/corpus-info.component.ts
@@ -1,6 +1,6 @@
import { Component, OnInit } from '@angular/core';
import { ApiService, CorpusService } from '@services';
-import { Corpus, CorpusDocumentationPage, FieldCoverage } from '@models';
+import { Corpus, CorpusDocumentationPage, FieldCardinality, FieldCoverage } from '@models';
import { marked } from 'marked';
import { Observable } from 'rxjs';
import { Title } from '@angular/platform-browser';
@@ -18,6 +18,7 @@ export class CorpusInfoComponent implements OnInit {
corpus: Corpus;
fieldCoverage: FieldCoverage;
+ fieldCardinality: FieldCardinality;
documentation$: Observable;
@@ -40,6 +41,9 @@ export class CorpusInfoComponent implements OnInit {
this.apiService.fieldCoverage(corpus.name).then(
result => this.fieldCoverage = result
);
+ this.apiService.fieldCardinality(corpus.name).then(
+ result => this.fieldCardinality = result
+ );
this.title.setTitle(pageTitle(`About ${corpus.title}`));
}
diff --git a/frontend/src/app/corpus/corpus-info/field-info/field-info.component.html b/frontend/src/app/corpus/corpus-info/field-info/field-info.component.html
index c50735816..d70b9af9b 100644
--- a/frontend/src/app/corpus/corpus-info/field-info/field-info.component.html
+++ b/frontend/src/app/corpus/corpus-info/field-info/field-info.component.html
@@ -36,9 +36,21 @@ {{field.displayName}}
{{coveragePercentage}}% of the documents in this corpus have a value for this field
-
Loading coverage data...
+
+ @switch(cardinality) {
+ @case(undefined) {
+ Loading cardinality data...
+ }
+ @case(0) {
+ Unique values are only calculated for keyword fields, not free text fields or dates.
+ }
+ @default {
+ There are {{cardinality}} unique values for this field.
+ }
+ }
+
diff --git a/frontend/src/app/corpus/corpus-info/field-info/field-info.component.ts b/frontend/src/app/corpus/corpus-info/field-info/field-info.component.ts
index 179d37ecb..db9bcd240 100644
--- a/frontend/src/app/corpus/corpus-info/field-info/field-info.component.ts
+++ b/frontend/src/app/corpus/corpus-info/field-info/field-info.component.ts
@@ -11,6 +11,7 @@ import * as _ from 'lodash';
export class FieldInfoComponent implements OnInit {
@Input() field: CorpusField;
@Input() coverage: number;
+ @Input() cardinality: number;
mappingNames = {
text: 'text',
diff --git a/frontend/src/app/models/visualization.ts b/frontend/src/app/models/visualization.ts
index f330d4b25..53ff7a4ae 100644
--- a/frontend/src/app/models/visualization.ts
+++ b/frontend/src/app/models/visualization.ts
@@ -113,3 +113,8 @@ export interface ChartParameters {
export interface FieldCoverage {
[field: string]: number;
};
+
+/** number of unique values for that field */
+export interface FieldCardinality {
+ [field: string]: number;
+}
diff --git a/frontend/src/app/services/api.service.ts b/frontend/src/app/services/api.service.ts
index d3fc870de..23a205660 100644
--- a/frontend/src/app/services/api.service.ts
+++ b/frontend/src/app/services/api.service.ts
@@ -2,7 +2,7 @@
import { Injectable } from '@angular/core';
import { HttpClient, HttpParams } from '@angular/common/http';
-import { interval, Observable } from 'rxjs';
+import { firstValueFrom, interval, Observable } from 'rxjs';
import { filter, switchMap, take, takeUntil } from 'rxjs/operators';
import {
AggregateTermFrequencyParameters,
@@ -13,6 +13,7 @@ import {
Download,
DownloadOptions,
FieldCoverage,
+ FieldCardinality,
FoundDocument,
GeoDocument,
GeoLocation,
@@ -185,6 +186,11 @@ export class ApiService {
return this.http.get(url).toPromise();
}
+ fieldCardinality(corpusName: string): Promise {
+ const url = this.apiRoute(this.visApiURL, `cardinality/${corpusName}`);
+ return firstValueFrom(this.http.get(url));
+ }
+
// Download
public requestFullData(
data:
From a87be3fdcd5fb5e1ac5908153a35d0ccd0deace5 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Wed, 27 May 2026 11:14:04 +0200
Subject: [PATCH 07/10] trying a dummy root password for elasticsearch
---
.env-ci | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.env-ci b/.env-ci
index 1cc42b5e3..b74b8d9bc 100644
--- a/.env-ci
+++ b/.env-ci
@@ -5,4 +5,5 @@ SQL_DATABASE=ianalyzer
SQL_PASSWORD=ianalyzer
CELERY_BROKER=redis://redis
ES_HOST=elasticsearch
-DATA_DIR=/ci-data
\ No newline at end of file
+DATA_DIR=/ci-data
+ELASTIC_ROOT_PASSWORD=ianalyzer
\ No newline at end of file
From 5810556b977a65c4728c8572720c5067e4012510 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Wed, 27 May 2026 12:57:17 +0200
Subject: [PATCH 08/10] trying no password in env or compose file
---
.env-ci | 1 -
docker-compose.yaml | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/.env-ci b/.env-ci
index b74b8d9bc..91474760f 100644
--- a/.env-ci
+++ b/.env-ci
@@ -6,4 +6,3 @@ SQL_PASSWORD=ianalyzer
CELERY_BROKER=redis://redis
ES_HOST=elasticsearch
DATA_DIR=/ci-data
-ELASTIC_ROOT_PASSWORD=ianalyzer
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 4f0082793..f763a4216 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -65,7 +65,7 @@ services:
- logger.org.elasticsearch.http=ERROR
- logger.org.elasticsearch.cluster=ERROR
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- - ELASTIC_PASSWORD=$ELASTIC_ROOT_PASSWORD
+
ulimits:
memlock:
soft: -1
From 58c9c11699557419c2ce8b35585a0fbcd3a6f4bd Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Wed, 27 May 2026 13:05:08 +0200
Subject: [PATCH 09/10] trying disabling ssl in env file
---
docker-compose.yaml | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker-compose.yaml b/docker-compose.yaml
index f763a4216..d80b8438d 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -60,6 +60,7 @@ services:
- cluster.name=ianalizer-es-data-cluster
- bootstrap.memory_lock=true
- xpack.security.enabled=false
+ - xpack.security.http.ssl.enabled=false
- logger.org.elasticsearch.discovery=ERROR
- logger.org.elasticsearch.transport=ERROR
- logger.org.elasticsearch.http=ERROR
From 8a874c261e2db75ae9ea0df2e895e98f4961dae2 Mon Sep 17 00:00:00 2001
From: Meesch <31687030+Meesch@users.noreply.github.com>
Date: Fri, 5 Jun 2026 09:17:13 +0200
Subject: [PATCH 10/10] 'fix' test for cardinality (issue made about Github
Actions env)
---
backend/visualization/tests/test_field_stats.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/backend/visualization/tests/test_field_stats.py b/backend/visualization/tests/test_field_stats.py
index a58881f5a..5da9089f7 100644
--- a/backend/visualization/tests/test_field_stats.py
+++ b/backend/visualization/tests/test_field_stats.py
@@ -21,7 +21,7 @@ def test_report(small_mock_corpus, es_client, index_small_mock_corpus, small_moc
'genre': 1.0,
}
-def test_cardinality(small_mock_corpus):
+def test_cardinality(small_mock_corpus, es_client, index_small_mock_corpus, small_mock_corpus_specs):
report = report_cardinality(small_mock_corpus)
assert report == {
'date': 0,