6464 "program" , "access" , "date_updated" , "tcia_wiki_collection_id" , "license_short_name" , "active"
6565 ])}
6666
67+ TOKENIZED_FIELDS = ["PatientID" , "SeriesInstanceUID" , "StudyInstanceUID" ]
68+
6769ranges_needed = {
6870 'wbc_at_diagnosis' : 'by_200' ,
6971 'event_free_survival_time_in_days' : 'by_500' ,
@@ -424,7 +426,13 @@ def create_solr_params(schema_src, solr_src):
424426 schema = BigQuerySupport .get_table_schema (schema_src [0 ],schema_src [1 ],schema_src [2 ])
425427 solr_schema = []
426428 solr_index_strings = []
427- SCHEMA_BASE = '{"add-field": %s}'
429+ field_types = ''
430+ add_copy_field = ''
431+ SCHEMA_BASE = '{{field_types}add-field": {fields}{add_copy_field}'
432+ if len (TOKENIZED_FIELDS ):
433+ field_types = '"add-field-type": { "name":"tokenizedText", "class":"solr.TextField", "analyzer" : { "tokenizer": { "name":"nGram" }}}, '
434+ copy_fields = "," .join (['{"source":"{field}","dest":"{field{}_tokenized"}' .format (field ) for field in TOKENIZED_FIELDS ])
435+ add_copy_field = ', "add-copy-field": [{copy_fields}]' .format (copy_fields )
428436 CORE_CREATE_STRING = "sudo -u solr /opt/bitnami/solr/bin/solr create -c {solr_src} -s 2 -rf 2"
429437 SCHEMA_STRING = "curl -u {solr_user}:{solr_pwd} -X POST -H 'Content-type:application/json' --data-binary '{schema}' https://localhost:8983/solr/{solr_src}/schema --cacert solr-ssl.pem"
430438 INDEX_STRING = "curl -u {solr_user}:{solr_pwd} -X POST 'https://localhost:8983/solr/{solr_src}/update?commit=yes{params}' --data-binary @{file_name}.csv -H 'Content-type:application/csv' --cacert solr-ssl.pem"
@@ -441,11 +449,22 @@ def create_solr_params(schema_src, solr_src):
441449 "stored" : True
442450 }
443451 solr_schema .append (field_schema )
452+ if TOKENIZED_FIELDS and field ['name' ] in TOKENIZED_FIELDS :
453+ solr_schema .append ({
454+ "name" : "{}_tokenized" .format (field ["name" ]),
455+ "type" : "tokenizedText" ,
456+ "multiValued" : False if field ['name' ] in SOLR_SINGLE_VAL .get (solr_src .aggregate_level ,
457+ {}) else True ,
458+ "stored" : True
459+ })
444460 if field_schema ['multiValued' ]:
445461 solr_index_strings .append ("f.{}.split=true&f.{}.separator=|" .format (field ['name' ],field ['name' ]))
446462
447463 with open ("{}_solr_cmds.txt" .format (solr_src .name ), "w" ) as cmd_outfile :
448- schema_array = SCHEMA_BASE % solr_schema
464+ schema_array = SCHEMA_BASE .format (
465+ field_types = field_types ,
466+ add_copy_field = add_copy_field
467+ )
449468 params = "&{}" .format ("&" .join (solr_index_strings ))
450469 cmd_outfile .write (CORE_CREATE_STRING .format (solr_src = solr_src .name ))
451470 cmd_outfile .write ("\n \n " )
@@ -662,11 +681,16 @@ def update_display_values(attr, updates):
662681 logger .info ("[STATUS] Added {} display values." .format (str (len (new_vals ))))
663682
664683
665- def load_tooltips (source_objs , attr_name , source_tooltip , obj_attr = None ):
684+ def load_tooltips (source_objs , attr_name , source_tooltip , obj_id_col = None ):
666685 try :
667686 attr = Attribute .objects .get (name = attr_name , active = True )
668- if not obj_attr :
669- obj_attr = attr_name
687+ # In some cases, the data sourcing the tooltip does not have an ID column with a name which matches
688+ # the attribute name (eg. in Collections, analysis results and collections both have a collection_id,
689+ # but in Attributes, analysis_result_id and collection_id are distinct attributes).
690+ # Used obi_id_col to specify the column in which the ID of the value to associate with the tooltip source in
691+ # the case the attribute name is different from the source object's column ID
692+ if not obj_id_col :
693+ obj_id_col = attr_name
670694
671695 tips = Attribute_Tooltips .objects .select_related ('attribute' ).filter (attribute = attr )
672696
@@ -677,7 +701,7 @@ def load_tooltips(source_objs, attr_name, source_tooltip, obj_attr=None):
677701 extent_tooltips [tip .attribute .id ] = []
678702 extent_tooltips [tip .attribute .id ].append (tip .tooltip_id )
679703
680- tooltips_by_val = {x [obj_attr ]: {'tip' : x [source_tooltip ]} for x in source_objs .values () if x [obj_attr ] != '' and x [obj_attr ] is not None }
704+ tooltips_by_val = {x [obj_id_col ]: {'tip' : x [source_tooltip ]} for x in source_objs .values () if x [obj_id_col ] != '' and x [obj_id_col ] is not None }
681705
682706 new_tooltips = []
683707 updated_tooltips = []
@@ -715,7 +739,7 @@ def load_display_vals(filename):
715739 attr_vals_file = open (filename , "r" )
716740
717741 for line in csv_reader (attr_vals_file ):
718- if 'display_value ' in line :
742+ if 'Raw ' in line :
719743 continue
720744 if line [0 ] not in display_vals :
721745 display_vals [line [0 ]] = {
@@ -800,7 +824,11 @@ def main():
800824 if len (args .display_vals ):
801825 dvals = load_display_vals (args .display_vals )
802826 for attr in dvals :
803- update_display_values (Attribute .objects .get (name = attr ), dvals [attr ]['vals' ])
827+ try :
828+ attr_obj = Attribute .objects .get (name = attr )
829+ update_display_values (attr_obj , dvals [attr ]['vals' ])
830+ except ObjectDoesNotExist as e :
831+ print ("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected." .format (attr ))
804832
805833 # Solr commands are automatically output for full ETL; the step below is for outside-of-ETL executions
806834 if len (ETL_CONFIG ):
0 commit comments