Skip to content

Commit d57e564

Browse files
authored
Release 50
- #1511 - #1515 - #1525 - #1512 - #1498
2 parents 784aa85 + eacf61f commit d57e564

38 files changed

Lines changed: 20088 additions & 705 deletions

etl/etl.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,7 @@ def add_data_sets(sets_set):
173173
try:
174174
obj, created = DataSetType.objects.update_or_create(name=dss['name'], data_type=dss['data_type'], set_type=dss['set_type'])
175175

176-
print("[STATUS] Data Set Type created:")
177-
print(obj)
176+
logger.info("[STATUS] Data Set Type created: {}".format(obj))
178177
except Exception as e:
179178
msg = "Data Version {} may not have been added!".format(dss['name'])
180179
ERRORS_SEEN.append(msg)
@@ -219,8 +218,7 @@ def add_programs(program_set):
219218
short_name=prog['short_name'], name=prog['full_name'], is_public=prog['public'],
220219
owner=User.objects.get(email=prog['owner']) if 'owner' in prog else idc_superuser)
221220

222-
print("Program created:")
223-
print(obj)
221+
logger.info("[STATUS] Program created: {}".format(obj))
224222

225223
results[obj.short_name] = obj
226224

@@ -281,12 +279,14 @@ def add_data_source(name, count_col, source_type, versions, programs, aggregate_
281279
)
282280
copy_attrs([attr_from], [name], attr_exclude)
283281

284-
print("[STATUS] DataSource entry created for: {}".format(obj.name))
282+
logger.info("[STATUS] DataSource entry created for: {}".format(obj.name))
285283
except Exception as e:
286284
msg = "DataSource {} may not have been added!".format(obj.name if obj else 'Unknown')
287-
ERRORS_SEEN.append(msg)
285+
clarifier = "Attributes are copied from a DataSource ORM object matched on the name, NOT from BigQuery directly! Check to make sure you have the correct attribute source name in the ETL config file."
286+
ERRORS_SEEN.append(msg + "\n" + clarifier)
288287
logger.error("[ERROR] {}".format(msg))
289288
logger.exception(e)
289+
logger.error("[ERROR] {}".format(clarifier))
290290

291291

292292
def add_source_joins(froms, from_col, tos=None, to_col=None):
@@ -322,26 +322,27 @@ def add_source_joins(froms, from_col, tos=None, to_col=None):
322322
def load_citations(filename):
323323
try:
324324
cites_file = open(filename,"r")
325-
current_cites = [x.doi for x in Citation.objects.all()]
325+
current_cites = [x.doi.lower() for x in Citation.objects.all()]
326326
new_cites = []
327327
updated_cites = {}
328328
for line in csv_reader(cites_file):
329-
if "doi, citation" in line:
330-
print("[STATUS] Saw header line during citation load - skipping!")
329+
if "source_doi" in line:
330+
logger.info("[STATUS] Saw header line during citation load - skipping!")
331331
continue
332-
if line[0] in current_cites:
333-
updated_cites[line[0]] = line[1]
332+
if line[0].lower() in current_cites:
333+
updated_cites[line[0].lower()] = {"doi": line[0], "cite":line[1]}
334334
else:
335335
new_cites.append(Citation(doi=line[0], cite=line[1]))
336336
if len(new_cites):
337337
Citation.objects.bulk_create(new_cites)
338-
print("[STATUS] The following {} DOI citations were added: {}".format(len(new_cites), " ".join([x.doi for x in new_cites])))
338+
logger.info("[STATUS] The following {} DOI citations were added: {}".format(len(new_cites), " ".join([x.doi for x in new_cites])))
339339
if len(updated_cites):
340340
to_update = Citation.objects.filter(doi__in=updated_cites.keys())
341341
for upd in to_update:
342-
upd.cite = updated_cites[upd.doi]
343-
Citation.objects.bulk_update(to_update, ["cite"])
344-
print("[STATUS] {} DOI citations were updated.".format(len(updated_cites)))
342+
upd.cite = updated_cites[upd.doi.lower()]["cite"]
343+
upd.doi = updated_cites[upd.doi.lower()]["doi"]
344+
Citation.objects.bulk_update(to_update, ["doi", "cite"])
345+
logger.info("[STATUS] {} DOI citations were updated.".format(len(updated_cites)))
345346
except Exception as e:
346347
ERRORS_SEEN.append("Error seen while loading citations, check the logs!")
347348
logger.error("[ERROR] While trying to load citations: ")
@@ -356,11 +357,11 @@ def load_collections(filename, data_version="8.0"):
356357
exact_collection_fields = [
357358
"collection_id", "collection_uuid", "name", "collections", "image_types", "supporting_data", "subject_count", "doi",
358359
"source_url", "cancer_type", "species", "location", "analysis_artifacts", "description", "collection_type",
359-
"access", "date_updated", "active", "total_size", "total_size_with_ar"]
360+
"access", "date_updated", "active","total_size", "total_size_with_ar"]
360361
field_map = FIELD_MAP
361362
for line in csv_reader(collection_file):
362363
if COLLECTION_HEADER_CHK in line:
363-
print("[STATUS] Header found - mappping attributes.")
364+
logger.info("[STATUS] Header found - mappping attributes.")
364365
i = 0
365366
field_map = {}
366367
for field in line:
@@ -371,6 +372,7 @@ def load_collections(filename, data_version="8.0"):
371372
'data': { x: line[field_map[x]] for x in exact_collection_fields },
372373
"data_versions": [{"ver": data_version, "name": "TCIA Image Data"}]
373374
}
375+
collex['data']['license'] = line[field_map["license_short_name"]]
374376
collex['data']['nbia_collection_id'] = line[field_map['tcia_wiki_collection_id']]
375377
collex['data']['tcia_collection_id'] = line[field_map['tcia_wiki_collection_id']]
376378
collex['data']['active'] = bool((line[field_map['active']]).lower() == "true")
@@ -612,7 +614,7 @@ def copy_attrs(from_data_sources, to_data_sources, attr_excludes):
612614

613615
for fds in from_sources:
614616
from_source_attrs = fds.attribute_set.exclude(id__in=to_sources_attrs['ids']).exclude(name__in=attr_excludes)
615-
print("Copying {} attributes from {} to: {}.".format(
617+
logger.info("[STATUS] Copying {} attributes from {} to: {}.".format(
616618
len(from_source_attrs.values_list('name',flat=True)),
617619
fds.name, "; ".join(to_data_sources),
618620

@@ -817,20 +819,19 @@ def parse_args():
817819
parser.add_argument('-s', '--solr-files-only', type=str, default='', help=solr_msg)
818820
return parser.parse_args()
819821

820-
821822
def main():
822823

823824
try:
824825
if len(sys.argv) <= 1:
825-
print("Use -h to access the help description.")
826+
logger.info("Use -h to access the help description.")
826827
exit(0)
827828

828829
args = parse_args()
829830

830831
# Load the configuration file into ETL_CONFIG and run data version and data source creation
831832
# This will copy over any attributes from prior versions indicated in the JSON config
832833
# Note that the config file is only required for 'full ETL' i.e. creation of new versions and
833-
# deprecation of prior ones; it can be omitted to perform piecemeal updates eg. to collections
834+
# deprecation of prior ones and running BQ queries. It can be omitted to perform piecemeal updates eg. to
834835
# metadata
835836
len(args.config_file) and update_data_versions(args.config_file)
836837

@@ -860,7 +861,7 @@ def main():
860861
attr_obj = Attribute.objects.get(name=attr)
861862
update_display_values(attr_obj, dvals[attr]['vals'])
862863
except ObjectDoesNotExist as e:
863-
print("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected.".format(attr))
864+
logger.warning("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected.".format(attr))
864865

865866
# Solr commands are automatically output for full ETL; the step below is for outside-of-ETL executions
866867
if len(ETL_CONFIG):
@@ -886,7 +887,7 @@ def main():
886887
logger.exception(e)
887888
if len(ERRORS_SEEN):
888889
for err in ERRORS_SEEN:
889-
print("-> {}".format(err))
890+
logger.error("-> {}".format(err))
890891

891892

892893
if __name__ == "__main__":

etl/etl_bq.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,43 @@
4141
from google_helpers.bigquery.bq_support import BigQuerySupport
4242

4343

44+
def fetch_display_vals(dataset_version):
45+
pass
46+
return
47+
48+
4449
def fetch_collex_metadata(dataset_version):
50+
pass
51+
return
52+
4553

54+
def fetch_citations(dataset_version):
4655
pass
56+
return
57+
4758

59+
def build_derived_pivots(dataset_version):
60+
# check for new attributes
61+
# run queries
62+
pass
4863
return
4964

65+
66+
def build_derived_all(dataset_version):
67+
pass
68+
return
69+
70+
71+
def build_and_export_indicies(dataset_version):
72+
pass
73+
return
74+
75+
76+
def create_pivot(dataset_version):
77+
pass
78+
return
79+
80+
81+
def fetch_data_summary(dataset_version):
82+
pass
83+
return

idc/models.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,22 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16+
import uuid
17+
from datetime import timedelta
1618

1719
from django.db import models
1820
from django.contrib.auth.models import User
19-
21+
import uuid
22+
import datetime
23+
import base64
2024
import logging
2125

2226
logger = logging.getLogger(__name__)
2327

28+
def cart_keygen():
29+
return '{}'.format(base64.b64encode(uuid.uuid4().bytes).replace("=",""))
30+
31+
CART_EXPIRATION = datetime.timedelta(days=90)
2432

2533
class AppInfo(models.Model):
2634
id = models.AutoField(primary_key=True, null=False, blank=False)
@@ -29,9 +37,7 @@ class AppInfo(models.Model):
2937
app_date = models.DateField(auto_now_add=True, null=False, blank=False)
3038
active = models.BooleanField(default=True, null=False, blank=False)
3139

32-
3340
class User_Data(models.Model):
3441
id = models.AutoField(primary_key=True, null=False, blank=False)
3542
user = models.ForeignKey(User, null=False, blank=True, on_delete=models.CASCADE)
3643
history = models.CharField(max_length=2000, blank=False, null=False, default='')
37-

idc/templatetags/custom_tags.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102

103103
ORIG_ATTR_FIRST = [
104104
"tcia_tumorLocation",
105+
"license_short_name",
105106
"CancerType",
106107
"BodyPartExamined",
107108
"Modality"
@@ -121,6 +122,11 @@ def quick_js_bracket_replace(matchobj):
121122
return '\u003E'
122123

123124

125+
@register.filter
126+
def size_check(collex_size):
127+
return 1 < collex_size <= 3
128+
129+
124130
@register.filter
125131
def parse_cooloff(timedelta):
126132
return str(datetime.utcnow() + timedelta)

idc/urls.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@
5151
re_path(r'^explore/manifest/$', views.explorer_manifest, name='get_explore_manifest'),
5252
re_path(r'^explore/manifest/series/$', views.explorer_manifest, name='get_series_ids_manifest'),
5353
re_path(r'^tables/', views.populate_tables, name='populate_tables'),
54-
re_path(r'^warning/', views.warn_page, name='warn'),
54+
re_path(r'^warning/$', views.warn_page, name='warn'),
5555
re_path(r'^about/$', views.about_page, name='about_page'),
56-
re_path(r'^dashboard/', views.dashboard_page, name='dashboard'),
56+
re_path(r'^dashboard/$', views.dashboard_page, name='dashboard'),
5757
re_path(r'^extended_login/$', views.extended_login_view, name='extended_login'),
5858
re_path(r'^privacy/$', views.privacy_policy, name='privacy'),
5959
re_path(r'^news/$', views.news_page, name='news'),
@@ -67,6 +67,7 @@
6767
re_path(r'^series_ids/(?P<collection_id>[A-Za-z0-9\.\-_]+)/(?P<patient_id>[A-Za-z0-9\.\-_]+)/(?P<study_uid>[0-9\.]+)/$', views.get_series, name='get_series'),
6868
re_path(r'^collaborators/$', views.collaborators, name='collaborators'),
6969
re_path(r'^collections/', include('idc_collections.urls')),
70+
re_path(r'^analysis_results/(?P<analysis_result_id>[A-Za-z\d\-\_]+)/$', views.analysis_results_details, name='analysis_results'),
7071
re_path(r'^citations/', views.get_citations, name='get_citations'),
7172
# re_path(r'^share/', include('sharing.urls')),
7273
]

idc/views.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from cohorts.models import Cohort, Cohort_Perms
4040

4141
from idc_collections.models import Program, DataSource, Collection, ImagingDataCommonsVersion, Attribute, Attribute_Tooltips, DataSetType, Citation
42+
from idc_collections.views import collection_details
4243
from idc_collections.collex_metadata_utils import (build_explorer_context, get_collex_metadata, create_file_manifest,
4344
get_cart_data_serieslvl, get_cart_data_studylvl,
4445
get_table_data_with_cart_data, cart_manifest)
@@ -58,6 +59,10 @@
5859

5960
BQ_ATTEMPT_MAX = 10
6061
WEBAPP_LOGIN_LOG_NAME = settings.WEBAPP_LOGIN_LOG_NAME
62+
SERIES_COLLEX_ID_TYPE = {
63+
"collection": "collection_id",
64+
"analysis_result": "analysis_results_id"
65+
}
6166

6267

6368
# The site's homepage
@@ -290,6 +295,8 @@ def populate_tables(request):
290295

291296
return JsonResponse(response, status=status)
292297

298+
def analysis_results_details(request, analysis_result_id):
299+
return collection_details(request, analysis_result_id)
293300

294301
def get_citations(request):
295302
resp = { 'message': 'error', 'citations': None}
@@ -524,7 +531,7 @@ def parse_explore_filters(request):
524531

525532
# Callback for recording the user's agreement to the warning popup
526533
def warn_page(request):
527-
request.session['seenWarning'] = True;
534+
request.session['seenWarning'] = True
528535
return JsonResponse({'warning_status': 'SEEN'}, status=200)
529536

530537

@@ -545,11 +552,13 @@ def cart_page(request):
545552
carthist = json.loads(req.get('carthist', '{}'))
546553
mxseries = req.get('mxseries',0)
547554
mxstudies = req.get('mxstudies',0)
555+
cart_disk_size = req.get('cart_disk_size', 0)
548556
stats = req.get('stats', '')
549557

550558
context['carthist'] = carthist
551559
context['mxseries'] = mxseries
552560
context['mxstudies'] = mxstudies
561+
context['cart_disk_size'] = cart_disk_size
553562
context['stats'] = stats
554563

555564
except Exception as e:
@@ -594,12 +603,12 @@ def cart_data(request):
594603
filtergrp_list, partitions, field_list if (not doi_or_size_only) else None, limit, offset,
595604
with_records=(not doi_or_size_only), dois_only=dois_only, size_only=size_only
596605
)
597-
print("response: {}".format(response))
598606
if dois_only:
599607
response = {'dois': response['dois']}
600608
if size_only:
601609
response = {
602-
"display_size": convert_disk_size(response['total_size'])
610+
"display_size": convert_disk_size(response['total_size']),
611+
"total_size": response['total_size']
603612
}
604613
except Exception as e:
605614
logger.error("[ERROR] While loading cart:")
@@ -612,12 +621,13 @@ def cart_data(request):
612621
def get_series(request, collection_id=None, patient_id=None, study_uid=None):
613622
status = 200
614623
response = {"result": []}
624+
req = request.GET if request.method == 'GET' else request.POST
615625
try:
616626
fields = ["collection_id", "PatientID", "StudyInstanceUID", "Modality", "crdc_series_uuid", "SeriesInstanceUID", "aws_bucket", "instance_size"]
617627
result = {}
618628
if not collection_id:
619629
# This is a request for filters and/or a cart
620-
body_unicode = request.body.decode('utf-8')
630+
body_unicode = req.body.decode('utf-8')
621631
body = json.loads(body_unicode)
622632
partitions = body.get("partitions", {})
623633
filtergrp_list = body.get("filtergrp_list", {})
@@ -651,8 +661,9 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
651661
active=True, source_type=DataSource.SOLR,
652662
aggregate_level="SeriesInstanceUID"
653663
).first()
664+
id_type = SERIES_COLLEX_ID_TYPE[req.get("type","collection")]
654665
filters = {
655-
"collection_id": [collection_id]
666+
id_type: [collection_id]
656667
}
657668
if patient_id:
658669
filters['PatientID'] = [patient_id]
@@ -673,7 +684,9 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
673684
}
674685
)
675686

687+
test_series = {}
676688
for doc in result['docs']:
689+
collection_id = doc['collection_id'][0] if isinstance(doc['collection_id'], list) else doc['collection_id']
677690
response['result'].append({
678691
"series_id": doc['SeriesInstanceUID'],
679692
"crdc_series_id": doc['crdc_series_uuid'],
@@ -682,8 +695,15 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
682695
"modality": doc['Modality'][0] if isinstance(doc['Modality'], list) else doc['Modality'],
683696
"study_id": doc['StudyInstanceUID'],
684697
"patient_id": doc["PatientID"],
685-
"collection_id": doc['collection_id'][0] if isinstance(doc['collection_id'], list) else doc['collection_id']
698+
"collection_id": collection_id
686699
})
700+
if not collection_id in test_series:
701+
test_series[collection_id] = {
702+
"series_id": doc['SeriesInstanceUID'],
703+
"study_id": doc['StudyInstanceUID'],
704+
"patient_id": doc["PatientID"],
705+
"collection_id": collection_id
706+
}
687707
if 'facets' in result:
688708
response['download_stats'] = {
689709
'series_count': result['facets']['total_SeriesInstanceUID'],
@@ -692,6 +712,7 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
692712
'case_count': result['facets']['total_PatientID'],
693713
'queue_byte_size': result['facets']['instance_size'],
694714
}
715+
response['test_series'] = list(test_series.values())
695716

696717
except Exception as e:
697718
logger.error("[ERROR] While fetching series per study ID:")

0 commit comments

Comments
 (0)