ImagingDataCommons
diff --git a/‎etl/etl.py‎
Lines changed: 24 additions & 23 deletions b/‎etl/etl.py‎
Lines changed: 24 additions & 23 deletions
diff --git a/‎etl/etl_bq.py‎
Lines changed: 34 additions & 0 deletions b/‎etl/etl_bq.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎idc/models.py‎
Lines changed: 9 additions & 3 deletions b/‎idc/models.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎idc/templatetags/custom_tags.py‎
Lines changed: 6 additions & 0 deletions b/‎idc/templatetags/custom_tags.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎idc/urls.py‎
Lines changed: 3 additions & 2 deletions b/‎idc/urls.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎idc/views.py‎
Lines changed: 27 additions & 6 deletions b/‎idc/views.py‎
Lines changed: 27 additions & 6 deletions
@@ -173,8 +173,7 @@ def add_data_sets(sets_set):
         try:
             obj, created = DataSetType.objects.update_or_create(name=dss['name'], data_type=dss['data_type'], set_type=dss['set_type'])
 
-            print("[STATUS] Data Set Type created:")
-            print(obj)
+            logger.info("[STATUS] Data Set Type created: {}".format(obj))
         except Exception as e:
             msg = "Data Version {} may not have been added!".format(dss['name'])
             ERRORS_SEEN.append(msg)
@@ -219,8 +218,7 @@ def add_programs(program_set):
                 short_name=prog['short_name'], name=prog['full_name'], is_public=prog['public'],
                 owner=User.objects.get(email=prog['owner']) if 'owner' in prog else idc_superuser)
 
-            print("Program created:")
-            print(obj)
+            logger.info("[STATUS] Program created: {}".format(obj))
 
             results[obj.short_name] = obj
 
@@ -281,12 +279,14 @@ def add_data_source(name, count_col, source_type, versions, programs, aggregate_
             )
         copy_attrs([attr_from], [name], attr_exclude)
 
-        print("[STATUS] DataSource entry created for: {}".format(obj.name))
+        logger.info("[STATUS] DataSource entry created for: {}".format(obj.name))
     except Exception as e:
         msg = "DataSource {} may not have been added!".format(obj.name if obj else 'Unknown')
-        ERRORS_SEEN.append(msg)
+        clarifier = "Attributes are copied from a DataSource ORM object matched on the name, NOT from BigQuery directly! Check to make sure you have the correct attribute source name in the ETL config file."
+        ERRORS_SEEN.append(msg + "\n" + clarifier)
         logger.error("[ERROR] {}".format(msg))
         logger.exception(e)
+        logger.error("[ERROR] {}".format(clarifier))
 
 
 def add_source_joins(froms, from_col, tos=None, to_col=None):
@@ -322,26 +322,27 @@ def add_source_joins(froms, from_col, tos=None, to_col=None):
 def load_citations(filename):
     try:
         cites_file = open(filename,"r")
-        current_cites = [x.doi for x in Citation.objects.all()]
+        current_cites = [x.doi.lower() for x in Citation.objects.all()]
         new_cites = []
         updated_cites = {}
         for line in csv_reader(cites_file):
-            if "doi, citation" in line:
-                print("[STATUS] Saw header line during citation load - skipping!")
+            if "source_doi" in line:
+                logger.info("[STATUS] Saw header line during citation load - skipping!")
                 continue
-            if line[0] in current_cites:
-                updated_cites[line[0]] = line[1]
+            if line[0].lower() in current_cites:
+                updated_cites[line[0].lower()] = {"doi": line[0], "cite":line[1]}
             else:
                 new_cites.append(Citation(doi=line[0], cite=line[1]))
         if len(new_cites):
             Citation.objects.bulk_create(new_cites)
-            print("[STATUS] The following {} DOI citations were added: {}".format(len(new_cites), "  ".join([x.doi for x in new_cites])))
+            logger.info("[STATUS] The following {} DOI citations were added: {}".format(len(new_cites), "  ".join([x.doi for x in new_cites])))
         if len(updated_cites):
             to_update = Citation.objects.filter(doi__in=updated_cites.keys())
             for upd in to_update:
-                upd.cite = updated_cites[upd.doi]
-            Citation.objects.bulk_update(to_update, ["cite"])
-            print("[STATUS] {} DOI citations were updated.".format(len(updated_cites)))
+                upd.cite = updated_cites[upd.doi.lower()]["cite"]
+                upd.doi = updated_cites[upd.doi.lower()]["doi"]
+            Citation.objects.bulk_update(to_update, ["doi", "cite"])
+            logger.info("[STATUS] {} DOI citations were updated.".format(len(updated_cites)))
     except Exception as e:
         ERRORS_SEEN.append("Error seen while loading citations, check the logs!")
         logger.error("[ERROR] While trying to load citations: ")
@@ -356,11 +357,11 @@ def load_collections(filename, data_version="8.0"):
         exact_collection_fields = [
             "collection_id", "collection_uuid", "name", "collections", "image_types", "supporting_data", "subject_count", "doi",
             "source_url", "cancer_type", "species", "location", "analysis_artifacts", "description", "collection_type",
-            "access", "date_updated", "active", "total_size", "total_size_with_ar"]
+            "access", "date_updated", "active","total_size", "total_size_with_ar"]
         field_map = FIELD_MAP
         for line in csv_reader(collection_file):
             if COLLECTION_HEADER_CHK in line:
-                print("[STATUS] Header found - mappping attributes.")
+                logger.info("[STATUS] Header found - mappping attributes.")
                 i = 0
                 field_map = {}
                 for field in line:
@@ -371,6 +372,7 @@ def load_collections(filename, data_version="8.0"):
                 'data': { x: line[field_map[x]] for x in exact_collection_fields },
                 "data_versions": [{"ver": data_version, "name": "TCIA Image Data"}]
             }
+            collex['data']['license'] = line[field_map["license_short_name"]]
             collex['data']['nbia_collection_id'] = line[field_map['tcia_wiki_collection_id']]
             collex['data']['tcia_collection_id'] = line[field_map['tcia_wiki_collection_id']]
             collex['data']['active'] = bool((line[field_map['active']]).lower() == "true")
@@ -612,7 +614,7 @@ def copy_attrs(from_data_sources, to_data_sources, attr_excludes):
 
     for fds in from_sources:
         from_source_attrs = fds.attribute_set.exclude(id__in=to_sources_attrs['ids']).exclude(name__in=attr_excludes)
-        print("Copying {} attributes from {} to: {}.".format(
+        logger.info("[STATUS] Copying {} attributes from {} to: {}.".format(
             len(from_source_attrs.values_list('name',flat=True)),
             fds.name, "; ".join(to_data_sources),
 
@@ -817,20 +819,19 @@ def parse_args():
     parser.add_argument('-s', '--solr-files-only', type=str, default='', help=solr_msg)
     return parser.parse_args()
 
-
 def main():
 
     try:
         if len(sys.argv) <= 1:
-            print("Use -h to access the help description.")
+            logger.info("Use -h to access the help description.")
             exit(0)
 
         args = parse_args()
 
         # Load the configuration file into ETL_CONFIG and run data version and data source creation
         # This will copy over any attributes from prior versions indicated in the JSON config
         # Note that the config file is only required for 'full ETL' i.e. creation of new versions and
-        # deprecation of prior ones; it can be omitted to perform piecemeal updates eg. to collections
+        # deprecation of prior ones and running BQ queries. It can be omitted to perform piecemeal updates eg. to
         # metadata
         len(args.config_file) and update_data_versions(args.config_file)
 
@@ -860,7 +861,7 @@ def main():
                     attr_obj = Attribute.objects.get(name=attr)
                     update_display_values(attr_obj, dvals[attr]['vals'])
                 except ObjectDoesNotExist as e:
-                    print("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected.".format(attr))
+                    logger.warning("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected.".format(attr))
 
         # Solr commands are automatically output for full ETL; the step below is for outside-of-ETL executions
         if len(ETL_CONFIG):
@@ -886,7 +887,7 @@ def main():
         logger.exception(e)
         if len(ERRORS_SEEN):
             for err in ERRORS_SEEN:
-                print("-> {}".format(err))
+                logger.error("-> {}".format(err))
 
 
 if __name__ == "__main__":
 
@@ -41,9 +41,43 @@
 from google_helpers.bigquery.bq_support import BigQuerySupport
 
 
+def fetch_display_vals(dataset_version):
+    pass
+    return
+
+
 def fetch_collex_metadata(dataset_version):
+    pass
+    return
+
 
+def fetch_citations(dataset_version):
     pass
+    return
+
 
+def build_derived_pivots(dataset_version):
+    # check for new attributes
+    # run queries
+    pass
     return
 
+
+def build_derived_all(dataset_version):
+    pass
+    return
+
+
+def build_and_export_indicies(dataset_version):
+    pass
+    return
+
+
+def create_pivot(dataset_version):
+    pass
+    return
+
+
+def fetch_data_summary(dataset_version):
+    pass
+    return
@@ -13,14 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import uuid
+from datetime import timedelta
 
 from django.db import models
 from django.contrib.auth.models import User
-
+import uuid
+import datetime
+import base64
 import logging
 
 logger = logging.getLogger(__name__)
 
+def cart_keygen():
+    return '{}'.format(base64.b64encode(uuid.uuid4().bytes).replace("=",""))
+
+CART_EXPIRATION = datetime.timedelta(days=90)
 
 class AppInfo(models.Model):
     id = models.AutoField(primary_key=True, null=False, blank=False)
@@ -29,9 +37,7 @@ class AppInfo(models.Model):
     app_date = models.DateField(auto_now_add=True, null=False, blank=False)
     active = models.BooleanField(default=True, null=False, blank=False)
 
-
 class User_Data(models.Model):
     id = models.AutoField(primary_key=True, null=False, blank=False)
     user = models.ForeignKey(User, null=False, blank=True, on_delete=models.CASCADE)
     history = models.CharField(max_length=2000, blank=False, null=False, default='')
-
 
@@ -102,6 +102,7 @@
 
 ORIG_ATTR_FIRST = [
     "tcia_tumorLocation",
+    "license_short_name",
     "CancerType",
     "BodyPartExamined",
     "Modality"
@@ -121,6 +122,11 @@ def quick_js_bracket_replace(matchobj):
         return '\u003E'
 
 
+@register.filter
+def size_check(collex_size):
+    return 1 < collex_size <= 3
+
+
 @register.filter
 def parse_cooloff(timedelta):
     return str(datetime.utcnow() + timedelta)
 
@@ -51,9 +51,9 @@
     re_path(r'^explore/manifest/$', views.explorer_manifest, name='get_explore_manifest'),
     re_path(r'^explore/manifest/series/$', views.explorer_manifest, name='get_series_ids_manifest'),
     re_path(r'^tables/', views.populate_tables, name='populate_tables'),
-    re_path(r'^warning/', views.warn_page, name='warn'),
+    re_path(r'^warning/$', views.warn_page, name='warn'),
     re_path(r'^about/$', views.about_page, name='about_page'),
-    re_path(r'^dashboard/', views.dashboard_page, name='dashboard'),
+    re_path(r'^dashboard/$', views.dashboard_page, name='dashboard'),
     re_path(r'^extended_login/$', views.extended_login_view, name='extended_login'),
     re_path(r'^privacy/$', views.privacy_policy, name='privacy'),
     re_path(r'^news/$', views.news_page, name='news'),
@@ -67,6 +67,7 @@
     re_path(r'^series_ids/(?P<collection_id>[A-Za-z0-9\.\-_]+)/(?P<patient_id>[A-Za-z0-9\.\-_]+)/(?P<study_uid>[0-9\.]+)/$', views.get_series, name='get_series'),
     re_path(r'^collaborators/$', views.collaborators, name='collaborators'),
     re_path(r'^collections/', include('idc_collections.urls')),
+    re_path(r'^analysis_results/(?P<analysis_result_id>[A-Za-z\d\-\_]+)/$', views.analysis_results_details, name='analysis_results'),
     re_path(r'^citations/', views.get_citations, name='get_citations'),
     # re_path(r'^share/', include('sharing.urls')),
 ]
 
@@ -39,6 +39,7 @@
 from cohorts.models import Cohort, Cohort_Perms
 
 from idc_collections.models import Program, DataSource, Collection, ImagingDataCommonsVersion, Attribute, Attribute_Tooltips, DataSetType, Citation
+from idc_collections.views import collection_details
 from idc_collections.collex_metadata_utils import (build_explorer_context, get_collex_metadata, create_file_manifest,
                                                    get_cart_data_serieslvl, get_cart_data_studylvl,
                                                    get_table_data_with_cart_data, cart_manifest)
@@ -58,6 +59,10 @@
 
 BQ_ATTEMPT_MAX = 10
 WEBAPP_LOGIN_LOG_NAME = settings.WEBAPP_LOGIN_LOG_NAME
+SERIES_COLLEX_ID_TYPE = {
+    "collection": "collection_id",
+    "analysis_result": "analysis_results_id"
+}
 
 
 # The site's homepage
@@ -290,6 +295,8 @@ def populate_tables(request):
 
     return JsonResponse(response, status=status)
 
+def analysis_results_details(request, analysis_result_id):
+    return collection_details(request, analysis_result_id)
 
 def get_citations(request):
     resp = { 'message': 'error', 'citations': None}
@@ -524,7 +531,7 @@ def parse_explore_filters(request):
 
 # Callback for recording the user's agreement to the warning popup
 def warn_page(request):
-    request.session['seenWarning'] = True;
+    request.session['seenWarning'] = True
     return JsonResponse({'warning_status': 'SEEN'}, status=200)
 
 
@@ -545,11 +552,13 @@ def cart_page(request):
         carthist = json.loads(req.get('carthist', '{}'))
         mxseries = req.get('mxseries',0)
         mxstudies = req.get('mxstudies',0)
+        cart_disk_size = req.get('cart_disk_size', 0)
         stats = req.get('stats', '')
 
         context['carthist'] = carthist
         context['mxseries'] = mxseries
         context['mxstudies'] = mxstudies
+        context['cart_disk_size'] = cart_disk_size
         context['stats'] = stats
 
     except Exception as e:
@@ -594,12 +603,12 @@ def cart_data(request):
                     filtergrp_list, partitions, field_list if (not doi_or_size_only) else None, limit, offset,
                     with_records=(not doi_or_size_only), dois_only=dois_only, size_only=size_only
                 )
-        print("response: {}".format(response))
         if dois_only:
             response = {'dois': response['dois']}
         if size_only:
             response = {
-                "display_size": convert_disk_size(response['total_size'])
+                "display_size": convert_disk_size(response['total_size']),
+                "total_size": response['total_size']
             }
     except Exception as e:
         logger.error("[ERROR] While loading cart:")
@@ -612,12 +621,13 @@ def cart_data(request):
 def get_series(request, collection_id=None, patient_id=None, study_uid=None):
     status = 200
     response = {"result": []}
+    req = request.GET if request.method == 'GET' else request.POST
     try:
         fields = ["collection_id", "PatientID", "StudyInstanceUID", "Modality", "crdc_series_uuid", "SeriesInstanceUID", "aws_bucket", "instance_size"]
         result = {}
         if not collection_id:
             # This is a request for filters and/or a cart
-            body_unicode = request.body.decode('utf-8')
+            body_unicode = req.body.decode('utf-8')
             body = json.loads(body_unicode)
             partitions = body.get("partitions", {})
             filtergrp_list = body.get("filtergrp_list", {})
@@ -651,8 +661,9 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
                 active=True, source_type=DataSource.SOLR,
                 aggregate_level="SeriesInstanceUID"
             ).first()
+            id_type = SERIES_COLLEX_ID_TYPE[req.get("type","collection")]
             filters = {
-                "collection_id": [collection_id]
+                id_type: [collection_id]
             }
             if patient_id:
                 filters['PatientID'] = [patient_id]
@@ -673,7 +684,9 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
                 }
             )
 
+        test_series = {}
         for doc in result['docs']:
+            collection_id = doc['collection_id'][0] if isinstance(doc['collection_id'], list) else doc['collection_id']
             response['result'].append({
                 "series_id": doc['SeriesInstanceUID'],
                 "crdc_series_id": doc['crdc_series_uuid'],
@@ -682,8 +695,15 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
                 "modality": doc['Modality'][0] if isinstance(doc['Modality'], list) else doc['Modality'],
                 "study_id": doc['StudyInstanceUID'],
                 "patient_id": doc["PatientID"],
-                "collection_id": doc['collection_id'][0] if isinstance(doc['collection_id'], list) else doc['collection_id']
+                "collection_id": collection_id
             })
+            if not collection_id in test_series:
+                test_series[collection_id] = {
+                    "series_id": doc['SeriesInstanceUID'],
+                    "study_id": doc['StudyInstanceUID'],
+                    "patient_id": doc["PatientID"],
+                    "collection_id": collection_id
+                }
         if 'facets' in result:
             response['download_stats'] = {
                 'series_count': result['facets']['total_SeriesInstanceUID'],
@@ -692,6 +712,7 @@ def get_series(request, collection_id=None, patient_id=None, study_uid=None):
                 'case_count': result['facets']['total_PatientID'],
                 'queue_byte_size': result['facets']['instance_size'],
             }
+        response['test_series'] = list(test_series.values())
 
     except Exception as e:
         logger.error("[ERROR] While fetching series per study ID:")