diff --git a/import-scripts/expand-clinical-data.py b/import-scripts/expand-clinical-data.py
index 55a2154e1..2f9d26c68 100644
--- a/import-scripts/expand-clinical-data.py
+++ b/import-scripts/expand-clinical-data.py
@@ -30,9 +30,9 @@ def expand_clinical_data_main(clinical_filename, fields, impact_data_only, ident
 			continue
 		# update line with supplemental sample clinical data and format data as string for output file
 		line.update(SUPPLEMENTAL_CLINICAL_DATA.get(line[identifier_column_name].strip(), {}))
-		data = map(lambda v: line.get(v,''), header)
+		data = map(lambda v: line.get(v,'Unknown'), header)
 		output_data.append('\t'.join(data))
-	data_file.close()	
+	data_file.close()
 
 	# write data to output file
 	output_file = open(clinical_filename, 'w')
@@ -49,6 +49,9 @@ def load_supplemental_clinical_data(supplemental_clinical_filename, supplemental
 		line = dict(zip(header, map(str.strip, line.split('\t'))))
 		if study_id == 'genie' and identifier_column_name == 'SAMPLE_ID':
 			normalize_genie_sample_type(line)
+			add_gene_panel_prefix(line)
+		elif study_id == 'genie' and identifier_column_name == 'PATIENT_ID':
+			line = dict((k,('Unknown' if v=='NA' else v)) for k,v in line.items())
 		SUPPLEMENTAL_CLINICAL_DATA[line[identifier_column_name].strip()] = dict({(k,v) for k,v in line.items() if k in supplemental_fields})
 	data_file.close()
 
@@ -61,6 +64,13 @@ def normalize_genie_sample_type(data):
 	except KeyError:
 		print >> ERROR_FILE, "No SAMPLE_TYPE column detected, cannot normalize genie sample type"
 	return data
+	
+def add_gene_panel_prefix(data):
+	try:
+		data['GENE_PANEL'] = 'MSK-'+data['GENE_PANEL']
+	except KeyError:
+		print >> ERROR_FILE, "No GENE_PANEL column detected, cannot add prefix to genie gene panel"
+	return data
 
 def is_impact_sample_or_patient(case_identifier):
 	""" Determine whether sample id is from IMPACT """
diff --git a/import-scripts/generate-clinical-subset.py b/import-scripts/generate-clinical-subset.py
index 7286bdd9e..033b3ee3c 100755
--- a/import-scripts/generate-clinical-subset.py
+++ b/import-scripts/generate-clinical-subset.py
@@ -121,11 +121,20 @@ def filter_patient_clinical_data(clin_patient_file, study_id):
 	data_file = open(clin_patient_file, 'rU')
 	data_reader = [line for line in data_file.readlines() if not line.startswith('#')][1:]
 	output_data = ['\t'.join(header)]
+	
+	SUPPLEMENTAL_PATIENT_CLINICAL_DATA = {}
 	for line in data_reader:
 		data = dict(zip(header, map(str.strip, line.split('\t'))))
-		if not data['PATIENT_ID'] in FILTERED_PATIENT_IDS:
-			continue
-		formatted_data = map(lambda v: data.get(v,''), header)
+		if data['PATIENT_ID'] in FILTERED_PATIENT_IDS: SUPPLEMENTAL_PATIENT_CLINICAL_DATA[data['PATIENT_ID']] = data
+
+	for patient_id in FILTERED_PATIENT_IDS:
+		if not patient_id in SUPPLEMENTAL_PATIENT_CLINICAL_DATA:
+			SUPPLEMENTAL_PATIENT_CLINICAL_DATA[patient_id] = dict(zip(header, map(lambda v: (patient_id if v=='PATIENT_ID' else 'NA'), header)))
+	
+	normalized_supplemental_patient_data = normalize_genie_patient_attributes(SUPPLEMENTAL_PATIENT_CLINICAL_DATA)
+	
+	for patient in normalized_supplemental_patient_data:
+		formatted_data = map(lambda v: normalized_supplemental_patient_data[patient].get(v,''), header)
 		output_data.append('\t'.join(formatted_data))
 	data_file.close()
 
@@ -135,6 +144,23 @@ def filter_patient_clinical_data(clin_patient_file, study_id):
 	output_file.close()
 	print >> OUTPUT_FILE, 'Input patient clinical data filtered by patient id for study: ' + study_id
 
+def normalize_genie_patient_attributes(data):
+	for patient_id in data:
+		if 'NAACCR_SEX_CODE' in data[patient_id] and data[patient_id]['NAACCR_SEX_CODE'].strip() == 'NA':
+			data[patient_id]['NAACCR_SEX_CODE'] = '9'
+		if 'NAACCR_RACE_CODE_PRIMARY' in data[patient_id] and data[patient_id]['NAACCR_RACE_CODE_PRIMARY'].strip() == 'NA':
+			data[patient_id]['NAACCR_RACE_CODE_PRIMARY'] = '99'
+		if 'NAACCR_RACE_CODE_SECONDARY' in data[patient_id] and data[patient_id]['NAACCR_RACE_CODE_SECONDARY'].strip() == 'NA':
+			data[patient_id]['NAACCR_RACE_CODE_SECONDARY'] = '99'
+		if 'NAACCR_RACE_CODE_TERTIARY' in data[patient_id] and data[patient_id]['NAACCR_RACE_CODE_TERTIARY'].strip() == 'NA':
+			data[patient_id]['NAACCR_RACE_CODE_TERTIARY'] = '99'
+		if 'NAACCR_ETHNICITY_CODE' in data[patient_id] and data[patient_id]['NAACCR_ETHNICITY_CODE'].strip() == 'NA':
+			data[patient_id]['NAACCR_ETHNICITY_CODE'] = '9'
+		if 'BIRTH_YEAR' in data[patient_id] and data[patient_id]['BIRTH_YEAR'].strip() == 'NA':
+			data[patient_id]['BIRTH_YEAR'] = 'Unknown'
+	return data
+
+
 def generate_sample_subset_file(subset_filename):
 	""" Writes subset of sample ids to output directory. """
 	output_file = open(subset_filename, 'w')
diff --git a/import-scripts/merge-cna-records.py b/import-scripts/merge-cna-records.py
new file mode 100644
index 000000000..e3a8c2cbe
--- /dev/null
+++ b/import-scripts/merge-cna-records.py
@@ -0,0 +1,100 @@
+import sys
+import os
+import optparse
+
+ERROR_FILE = sys.stderr
+OUTPUT_FILE = sys.stdout
+
+HEADER_KEYWORDS = ['hugo_symbol','entrez_gene_id']
+GENE_MERGE_LIST = {'CDKN2Ap16INK4A': 'CDKN2A',
+'CDKN2Ap14ARF': 'CDKN2A',
+'MLL': 'KMT2A',
+'MLL2': 'KMT2D',
+'MLL3': 'KMT2C',
+'MLL4': 'KMT2B',
+'FAM123B': 'AMER1',
+'MYCL1': 'MYCL'}
+
+def get_file_header(filename):
+	""" Returns the file header. """
+	data_file = open(filename, 'rU')
+	filedata = [x for x in data_file.readlines() if not x.startswith('#')]
+	header = map(str.strip, filedata[0].split('\t'))
+	data_file.close()
+	return header
+
+def merge_duplicate_cna_records(data):
+	for gene in data:
+		if len(data[gene]) > 1:
+			merge_status = 0
+			cna_data = map(lambda v: set(v), zip(*data[gene]))
+			merged_cna_data = []
+			for value in cna_data:
+				if len(value) > 1:
+					value = value - set(['NA'])
+					if len(value) > 1: value = value - set([''])
+					if len(value) > 1: value = value - set(['0'])
+					if len(value) > 1:
+						if len(value) == 2 and '-1.5' in value and '-2' in value: value.remove('-1.5')
+						else: merge_status = 1; break
+					merged_cna_data.append(map(str,value))
+				else:
+					merged_cna_data.append(map(str,value))
+			if merge_status == 1:
+				print >> ERROR_FILE, "The copy number values for gene", gene, "cannot be merged"
+			else:
+				merged_cna_data = [value[0] for value in merged_cna_data]
+				data[gene] = [merged_cna_data]
+	return(data)			
+
+def write_merged_cna_data(data,header,out_cna_filepath):
+	unmerged_data = ""
+	merged_data = ""
+	
+	for gene_symbol in data:
+		if len(data[gene_symbol]) > 1:
+			for record in data[gene_symbol]:
+				unmerged_data += gene_symbol+'\t'+'\t'.join(record)+'\n'
+		else:
+			merged_data += gene_symbol+'\t'+'\t'.join(data[gene_symbol][0])+'\n'
+	
+	if unmerged_data != "":
+		unmerged_file = open(out_cna_filepath+'data_CNA_unmerged.txt','w')
+		unmerged_file.write('\t'.join(header)+'\n')
+		unmerged_file.write(unmerged_data)
+		print >> OUTPUT_FILE, "The unmerged CNA records are written to :", out_cna_filepath+'data_CNA_unmerged.txt'
+	if merged_data != "":
+		merged_file = open(out_cna_filepath+'data_CNA_merged.txt','w')
+		merged_file.write('\t'.join(header)+'\n')
+		merged_file.write(merged_data)
+		print >> OUTPUT_FILE, "The merged CNA records are written to :", out_cna_filepath+'data_CNA_merged.txt'
+
+def main():
+	# get command line arguments
+	parser = optparse.OptionParser()
+	parser.add_option('-i', '--input-cnafile', action = 'store', dest = 'cnafile')
+	parser.add_option('-o', '--output-cna-filepath', action = 'store', dest = 'out_cna_filepath')
+
+	(options, args) = parser.parse_args()
+	cna_filename = options.cnafile
+	out_cna_filepath = options.out_cna_filepath
+	
+	header = get_file_header(cna_filename)
+
+	# load data from clinical_filename and write data to output directory
+	data_file = open(cna_filename, 'rU')
+	data_reader = [line for line in data_file.readlines() if not line.startswith('#')][1:]
+	
+	COPY_NUMBER_DATA = {}
+	for line in data_reader:
+		line = line.strip('\n').split('\t')
+		if line[0] in GENE_MERGE_LIST: line[0] = GENE_MERGE_LIST[line[0]]
+		if line[0] not in COPY_NUMBER_DATA: COPY_NUMBER_DATA[line[0]] = [line[1:]]
+		else: COPY_NUMBER_DATA[line[0]].append(line[1:])
+	
+	data = merge_duplicate_cna_records(COPY_NUMBER_DATA)
+	
+	write_merged_cna_data(data,header,out_cna_filepath)
+	
+if __name__ == '__main__':
+	main()
\ No newline at end of file
diff --git a/import-scripts/remove-duplicate-maf-variants.py b/import-scripts/remove-duplicate-maf-variants.py
new file mode 100644
index 000000000..4328ce46d
--- /dev/null
+++ b/import-scripts/remove-duplicate-maf-variants.py
@@ -0,0 +1,84 @@
+import sys
+import os
+import optparse
+
+# Script to remove duplicate maf records based on the 8 key columns.
+# Calculates VAF for each record and picks the record with high VAF
+# Formula for VAF = t_alt_count / (t_ref_count + t_alt_count) 
+
+ERROR_FILE = sys.stderr
+OUTPUT_FILE = sys.stdout
+
+KEY_COLUMNS_INDEX = []
+KEY_COLUMNS = ['Entrez_Gene_Id','Chromosome','Start_Position','End_Position','Variant_Classification','Tumor_Seq_Allele2','Tumor_Sample_Barcode','HGVSp_Short']
+MAF_DATA = {}
+
+def remove_duplicate_variants(maf_filename, comments, header, t_refc_index, t_altc_index):
+	outfile = []
+	outfile.append(comments)
+	outfile.append(header)
+	for key in MAF_DATA:
+		if len(MAF_DATA[key]) > 1:
+			vaf_ind = 0
+			vaf_value = 0
+			for val in MAF_DATA[key]:
+				#calculate VAF for each duplicate record.
+				columns = val.rstrip('\n').split('\t')
+				try:
+					VAF = int(columns[t_altc_index])/(int(columns[t_altc_index])+int(columns[t_refc_index]))
+					if VAF > vaf_value:
+						vaf_value = VAF
+						vaf_ind = MAF_DATA[key].index(val)
+						outfile.append(MAF_DATA[key][vaf_ind])
+				except:
+					print >> ERROR_FILE, 'ERROR: VAF cannot be calculated for the variant : ' + key
+					print >> ERROR_FILE, 'The t_ref_count is: '+ columns[t_refc_index]+ ' and t_alt_count is: '+ columns[t_altc_index]
+					outfile.append(val)
+		else:
+			outfile.append(MAF_DATA[key][0])
+			
+	datafile = open(maf_filename, 'w')
+	for line in outfile:
+		datafile.write(line)
+	datafile.close()
+	print >> OUTPUT_FILE, 'MAF file with duplicate variants removed is written to: ' + maf_filename
+		
+
+def main():
+	# get command line arguments
+	parser = optparse.OptionParser()
+	parser.add_option('-i', '--input-maf-file', action = 'store', dest = 'maf_file')
+
+	(options, args) = parser.parse_args()
+	maf_filename = options.maf_file
+
+	comments = ""
+	header = ""
+	
+	with open(maf_filename,'r') as maf_file:
+		for line in maf_file:
+			if line.startswith('#'):
+				comments += line
+			elif line.startswith('Hugo_Symbol'):
+				header += line
+				header_cols = line.rstrip('\n').split('\t')
+				#get the positions of the 8 key maf columns
+				for value in KEY_COLUMNS:
+					KEY_COLUMNS_INDEX.append(header_cols.index(value))
+				t_refc_index = header_cols.index('t_ref_count')
+				t_altc_index = header_cols.index('t_alt_count')
+			else:
+				reference_key = ""
+				data = line.rstrip('\n').split('\t')
+				for index in KEY_COLUMNS_INDEX:
+					reference_key += data[index]+'\t'
+				reference_key = reference_key.rstrip('\t')
+				if reference_key not in MAF_DATA:
+					MAF_DATA[reference_key] = [line]
+				else:
+					MAF_DATA[reference_key].append(line)
+	
+	remove_duplicate_variants(maf_filename, comments, header, t_refc_index, t_altc_index)
+
+if __name__ == '__main__':
+	main()
diff --git a/import-scripts/subset-impact-data.sh b/import-scripts/subset-impact-data.sh
index ebe168528..1f5c71934 100755
--- a/import-scripts/subset-impact-data.sh
+++ b/import-scripts/subset-impact-data.sh
@@ -87,8 +87,7 @@ if [ $STUDY_ID == "genie" ]; then
     else
         # starting in Nov 2018 releases, all vital status information will be removed from patient file and placed in a separate file:
         # get the patients from the filtered set of patients from the generate-clinical-subset.py call and expand file with the vital status columns
-        cut -f1 $OUTPUT_DIRECTORY/data_clinical_supp_patient.txt > $OUTPUT_DIRECTORY/vital_status.txt
-        $PYTHON_BINARY $PORTAL_SCRIPTS_DIRECTORY/expand-clinical-data.py --study-id="genie" --clinical-file="$OUTPUT_DIRECTORY/vital_status.txt" --clinical-supp-file="$INPUT_DIRECTORY/ddp/ddp_vital_status.txt" --fields="YEAR_CONTACT,YEAR_DEATH,INT_CONTACT,INT_DOD,DEAD" --identifier-column-name="PATIENT_ID"
+        $PYTHON_BINARY $PORTAL_SCRIPTS_DIRECTORY/expand-clinical-data.py --study-id="genie" --clinical-file="$OUTPUT_DIRECTORY/data_clinical_supp_patient.txt" --clinical-supp-file="$INPUT_DIRECTORY/ddp/ddp_vital_status.txt" --fields="YEAR_CONTACT,YEAR_DEATH,INT_CONTACT,INT_DOD,DEAD" --identifier-column-name="PATIENT_ID"
         if [ $? -gt 0 ] ; then
             echo "Failed to expand $OUTPUT_DIRECTORY/vital_status.txt with YEAR_CONTACT, YEAR_DEATH, INT_CONTACT, INT_DOD, DEAD from $INPUT_DIRECTORY/ddp/ddp_vital_status.txt. Exiting..."
             exit 2
@@ -106,7 +105,7 @@ if [ $STUDY_ID == "genie" ]; then
         $PYTHON_BINARY $PORTAL_SCRIPTS_DIRECTORY/add-age-at-seq-report.py --clinical-file="$OUTPUT_DIRECTORY/data_clinical_supp_sample.txt" --seq-date-file="$INPUT_DIRECTORY/cvr/seq_date.txt" --age-file="$INPUT_DIRECTORY/ddp/ddp_age.txt" --convert-to-days="true"
         if [ $? -gt 0 ] ; then
             echo "Failed to add AGE_AT_SEQ_REPORT to $OUTPUT_DIRECTORY/data_clinical_supp_sample.txt using $INPUT_DIRECTORY/cvr/seq_date.txt. Exiting..."
-            exit 2l
+            exit 2
         fi
 
         # rename GENE_PANEL to SEQ_ASSAY_ID in data_clinical_supp_sample.txt
@@ -123,6 +122,20 @@ if [ $STUDY_ID == "genie" ]; then
             # remove germline mutations from maf
             grep -v 'GERMLINE' $OUTPUT_DIRECTORY/data_mutations_extended.txt > $OUTPUT_DIRECTORY/data_mutations_extended.txt.tmp
             mv $OUTPUT_DIRECTORY/data_mutations_extended.txt.tmp $OUTPUT_DIRECTORY/data_mutations_extended.txt
+        fi
+		
+		# merge duplicate variants from maf
+        $PYTHON_BINARY $PORTAL_SCRIPTS_DIRECTORY/remove-duplicate-maf-variants.py --input-maf-file="$OUTPUT_DIRECTORY/data_mutations_extended.txt"
+        if [ $? -gt 0 ] ; then
+            echo "Failed to merge duplicate CNA records in $OUTPUT_DIRECTORY/data_CNA.txt. Exiting.."
+            exit 2
+        fi
+		
+		# merge CNA records for certain gene duplicates    
+        $PYTHON_BINARY $PORTAL_SCRIPTS_DIRECTORY/merge-cna-records.py --input-cnafile="$OUTPUT_DIRECTORY/data_CNA.txt" --output-cna-filepath="$OUTPUT_DIRECTORY/"
+        if [ $? -gt 0 ] ; then
+            echo "Failed to merge duplicate CNA records in $OUTPUT_DIRECTORY/data_CNA.txt. Exiting.."
+            exit 2
         fi
     fi
 else