Skip to content

Commit b769b7c

Browse files
authored
Merge pull request #2073 from CentreForDigitalHumanities/feature/dropdown-imroved
Feature/dropdown improved
2 parents 6d766d3 + 39b334b commit b769b7c

15 files changed

Lines changed: 130 additions & 17 deletions

.env-ci

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ SQL_DATABASE=ianalyzer
55
SQL_PASSWORD=ianalyzer
66
CELERY_BROKER=redis://redis
77
ES_HOST=elasticsearch
8-
DATA_DIR=/ci-data
8+
DATA_DIR=/ci-data

backend/visualization/field_stats.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,35 @@ def report_coverage(corpus_name):
5050
}
5151

5252

53+
def cardinality_results(search_result):
54+
return search_result['aggregations']['unique_category_count']['value']
55+
56+
def report_cardinality(corpus_name):
57+
'''
58+
Returns a dict with the number of unique values for each field in the corpus
59+
'''
60+
es_client = elasticsearch(corpus_name)
61+
corpus_conf = CorpusConfiguration.objects.get(corpus__name=corpus_name)
62+
cardinality_dict = {}
63+
64+
query = {
65+
"size": 0,
66+
"aggs": {
67+
"unique_category_count": {
68+
"cardinality": {
69+
"field": "PLACEHOLDER",
70+
"precision_threshold": 10000
71+
}
72+
}
73+
}
74+
}
75+
76+
for field in corpus_conf.fields.all():
77+
if field.display_type != 'keyword':
78+
cardinality_dict[field.name] = 0
79+
else:
80+
query_for_field = query
81+
query_for_field['aggs']['unique_category_count']['cardinality']['field'] = field.name
82+
cardinality_dict[field.name] = cardinality_results(es_client.search(index=corpus_conf.es_index, body=query_for_field))
83+
84+
return cardinality_dict

backend/visualization/tests/test_field_stats.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from visualization.field_stats import count_field, count_total, report_coverage
1+
from visualization.field_stats import count_field, count_total, report_coverage, report_cardinality
22

33

44
def test_count(small_mock_corpus, es_client, index_small_mock_corpus, small_mock_corpus_specs):
@@ -20,3 +20,12 @@ def test_report(small_mock_corpus, es_client, index_small_mock_corpus, small_moc
2020
'content': 1.0,
2121
'genre': 1.0,
2222
}
23+
24+
def test_cardinality(small_mock_corpus, es_client, index_small_mock_corpus, small_mock_corpus_specs):
25+
report = report_cardinality(small_mock_corpus)
26+
assert report == {
27+
'date': 0,
28+
'title': 0,
29+
'content': 0,
30+
'genre': 3
31+
}

backend/visualization/urls.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
path('ngram', NgramView.as_view()),
99
path('date_term_frequency', DateTermFrequencyView.as_view()),
1010
path('aggregate_term_frequency', AggregateTermFrequencyView.as_view()),
11-
path('coverage/<str:corpus>', FieldCoverageView.as_view())
11+
path('coverage/<str:corpus>', FieldCoverageView.as_view()),
12+
path('cardinality/<str:corpus>', FieldCardinalityView.as_view())
1213
]

backend/visualization/views.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from django.conf import settings
77
from addcorpus.permissions import CanSearchCorpus
88
from tag.permissions import CanSearchTags
9-
from visualization.field_stats import report_coverage
9+
from visualization.field_stats import report_coverage, report_cardinality
1010
from addcorpus.permissions import corpus_name_from_request
1111
from api.utils import check_json_keys
1212

@@ -181,3 +181,15 @@ def get(self, request, *args, **kwargs):
181181
corpus = corpus_name_from_request(request)
182182
report = report_coverage(corpus)
183183
return Response(report)
184+
185+
class FieldCardinalityView(APIView):
186+
'''
187+
Get the number of different values for each filed in a corpus
188+
'''
189+
190+
permission_classes = [CanSearchCorpus]
191+
192+
def get(self, request, *args, **kwargs):
193+
corpus = corpus_name_from_request(request)
194+
report = report_cardinality(corpus)
195+
return Response(report)

docker-compose.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,13 @@ services:
6060
- cluster.name=ianalizer-es-data-cluster
6161
- bootstrap.memory_lock=true
6262
- xpack.security.enabled=false
63+
- xpack.security.http.ssl.enabled=false
6364
- logger.org.elasticsearch.discovery=ERROR
6465
- logger.org.elasticsearch.transport=ERROR
6566
- logger.org.elasticsearch.http=ERROR
6667
- logger.org.elasticsearch.cluster=ERROR
6768
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
68-
- ELASTIC_PASSWORD=$ELASTIC_ROOT_PASSWORD
69+
6970
ulimits:
7071
memlock:
7172
soft: -1

frontend/src/app/corpus/corpus-info/corpus-info.component.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
id="fields" title="Fields">
3333
<div class="block" *ngFor="let field of corpus.fields">
3434
<ia-field-info [field]="field"
35-
[coverage]="fieldCoverage ? fieldCoverage[field.name] : undefined">
35+
[coverage]="fieldCoverage ? fieldCoverage[field.name] : undefined"
36+
[cardinality]="fieldCardinality ? fieldCardinality[field.name] : undefined">
3637
</ia-field-info>
3738
</div>
3839
</ng-template>

frontend/src/app/corpus/corpus-info/corpus-info.component.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Component, OnInit } from '@angular/core';
22
import { ApiService, CorpusService } from '@services';
3-
import { Corpus, CorpusDocumentationPage, FieldCoverage } from '@models';
3+
import { Corpus, CorpusDocumentationPage, FieldCardinality, FieldCoverage } from '@models';
44
import { marked } from 'marked';
55
import { Observable } from 'rxjs';
66
import { Title } from '@angular/platform-browser';
@@ -18,6 +18,7 @@ export class CorpusInfoComponent implements OnInit {
1818
corpus: Corpus;
1919

2020
fieldCoverage: FieldCoverage;
21+
fieldCardinality: FieldCardinality;
2122

2223
documentation$: Observable<CorpusDocumentationPage[]>;
2324

@@ -40,6 +41,9 @@ export class CorpusInfoComponent implements OnInit {
4041
this.apiService.fieldCoverage(corpus.name).then(
4142
result => this.fieldCoverage = result
4243
);
44+
this.apiService.fieldCardinality(corpus.name).then(
45+
result => this.fieldCardinality = result
46+
);
4347
this.title.setTitle(pageTitle(`About ${corpus.title}`));
4448
}
4549

frontend/src/app/corpus/corpus-info/field-info/field-info.component.html

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,21 @@ <h3 class="title is-5">{{field.displayName}}</h3>
3636
<p *ngIf="coverage !== undefined">
3737
<b>{{coveragePercentage}}%</b> of the documents in this corpus have a value for this field
3838
</p>
39-
4039
<p *ngIf="coverage === undefined">
4140
Loading coverage data...
4241
</p>
42+
43+
@switch(cardinality) {
44+
@case(undefined) {
45+
<p>Loading cardinality data...</p>
46+
}
47+
@case(0) {
48+
<p>Unique values are only calculated for keyword fields, not free text fields or dates.</p>
49+
}
50+
@default {
51+
<p>There are <b>{{cardinality}}</b> unique values for this field.</p>
52+
}
53+
}
54+
4355
</div>
4456
</details>

frontend/src/app/corpus/corpus-info/field-info/field-info.component.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import * as _ from 'lodash';
1111
export class FieldInfoComponent implements OnInit {
1212
@Input() field: CorpusField;
1313
@Input() coverage: number;
14+
@Input() cardinality: number;
1415

1516
mappingNames = {
1617
text: 'text',

0 commit comments

Comments
 (0)