@@ -5,13 +5,14 @@ Builds a fresh data repo from source data
55"""
66import argparse
77import json
8+ import time
89from collections import defaultdict
910from copy import deepcopy
1011from os import getcwd
1112from os .path import dirname , realpath , join , relpath , isfile
1213
1314from lib .changelog import changelog_prepare , changelog_get_unreleased_section
14- from lib .container import dict_get , dict_get_required , dict_set , unique , find_index_by , first , format_list , \
15+ from lib .container import dict_get , dict_get_required , dict_set , find_index_by , first , format_list , \
1516 dict_remove_many , find_duplicates , dict_cleanup
1617from lib .date import now_iso , iso_to_iso_safe
1718from lib .fasta import fasta_read_exactly_one_seq
@@ -117,14 +118,18 @@ def get_new_dataset_order(datasets, dataset_order):
117118 return dataset_order
118119
119120
120- def sort_collections (collections , dataset_order ):
121- return [collection for x in dataset_order for collection in collections if collection ["meta" ]["id" ] == x ]
121+ def sort_collections (collections , collections_order ):
122+ return [collection for x in collections_order for collection in collections if collection ["meta" ]["id" ] == x ]
122123
123124
124125def sort_datasets (datasets , dataset_order ):
125126 return [dataset for x in dataset_order for dataset in datasets if dataset ["path" ] == x ]
126127
127128
129+ def sort_release_infos (release_infos , dataset_order ):
130+ return [release_info for x in dataset_order for release_info in release_infos if release_info ["dataset" ]["path" ] == x ]
131+
132+
128133def parse_args ():
129134 parser = argparse .ArgumentParser (formatter_class = argparse .ArgumentDefaultsHelpFormatter )
130135
@@ -209,15 +214,15 @@ def main():
209214 tag = iso_to_iso_safe (updated_at ) if args .release else "unreleased"
210215 git_check_tag (tag )
211216
212- collection_json_paths = find_files ("collection.json" , args .input_dir )
217+ collection_json_paths = list ( find_files ("collection.json" , args .input_dir ) )
213218
214219 collections = []
215220 release_infos = []
216221 all_refs = {}
217222 for collection_json_path in collection_json_paths :
218- collection , release_infos , refs = process_one_collection (collection_json_path , args , tag , updated_at )
223+ collection , release_infos_for_dataset , refs = process_one_collection (collection_json_path , args , tag , updated_at )
219224 collections .append (collection )
220- release_infos .extend (release_infos )
225+ release_infos .extend (release_infos_for_dataset )
221226 all_refs .update (refs )
222227
223228 collections = sort_collections (collections , ["nextstrain" , "community" ])
@@ -254,10 +259,14 @@ def main():
254259 commit_hash = commit_changes (args , tag , release_infos )
255260
256261 if args .push :
262+ l .info ("Pushing commited changes to GitHub" )
257263 git_push ()
258264
259265 if args .release :
266+ l .info ("Releasing to GitHub" )
260267 release_notes = aggregate_release_notes (release_infos )
268+ l .info (f"Release notes:\n -------\n { release_notes } \n -------\n End of release notes\n " )
269+ time .sleep (5 )
261270 publish_to_github_releases (args , tag , commit_hash , release_notes )
262271
263272
@@ -285,6 +294,7 @@ def process_one_collection(collection_json_path, args, tag, updated_at):
285294 json_write (collection_json , collection_json_path , no_sort_keys = True )
286295
287296 release_infos = prepare_dataset_release_infos (args , datasets , collection_dir , tag , updated_at )
297+ release_infos = sort_release_infos (release_infos , dataset_order )
288298
289299 collection_info = deepcopy (collection_json )
290300
@@ -351,26 +361,33 @@ def prepare_dataset_release_infos(args, datasets, collection_dir, tag, updated_a
351361
352362
353363def aggregate_release_notes (release_infos ):
354- dataset_names_friendly = format_list (
355- unique ([get_dataset_name_friendly (release_info ["dataset" ]) for release_info in release_infos ]),
356- sep = "\n " , marker = "- " , quote = False
357- )
358- release_notes = f"This release contains changes for datasets:\n \n { dataset_names_friendly } \n \n \n "
364+ dataset_list = format_dataset_list (release_infos )
365+ release_notes = f"This release contains changes for datasets:\n \n { dataset_list } \n \n \n "
359366 for release_info in release_infos :
360367 release_notes += f'\n { release_info ["release_notes" ]} \n \n '
361368 return release_notes
362369
363370
371+ def format_dataset_list (release_infos ):
372+ entries = [format_dataset_list_entry (release_info ) for release_info in release_infos ]
373+ return format_list (entries , sep = "\n " , marker = "- " , quote = False )
374+
375+
376+ def format_dataset_list_entry (release_info ):
377+ path = dict_get_required (release_info , ['dataset' , 'path' ])
378+ name = get_dataset_name_friendly (release_info ['dataset' ])
379+ return f"{ path } ({ name } )"
380+
381+
364382def commit_changes (args , tag , release_infos ):
365- l .info (f"Commiting changes for '{ tag } '" )
383+ l .info (f"Committing changes for '{ tag } '" )
366384
367385 commit_message = "chore: rebuild [skip ci]"
368386 if args .release :
369- dataset_names = format_list (
370- unique ([get_dataset_name (release_info ["dataset" ]) for release_info in release_infos ]),
371- sep = "\n " , marker = "- " , quote = False
372- )
373- commit_message = f"chore: release '{ tag } '\n \n Updated datasets:\n \n { dataset_names } "
387+ dataset_list = format_dataset_list (release_infos )
388+ commit_message = f"chore: release '{ tag } '\n \n Updated datasets:\n \n { dataset_list } "
389+
390+ l .info (f"Commit message:\n --------\n { commit_message } \n --------\n End of commit message\n \n " )
374391
375392 return git_commit_all (commit_message )
376393
@@ -438,12 +455,8 @@ def create_dataset_package(args, dataset, pathogen_json, tag, dataset_dir):
438455 file_write ("User-agent: *\n Disallow: /\n " , join (args .output_dir , "robots.txt" ))
439456
440457
441- def get_dataset_name (dataset ):
442- return dict_get_required (dataset , ["attributes" , "name" , "value" ])
443-
444-
445458def get_dataset_name_friendly (dataset ):
446- return dict_get (dataset , ["attributes" , "name" , "valueFriendly" ]) or get_dataset_name ( dataset )
459+ return dict_get_required (dataset , ["attributes" , "name" ] )
447460
448461
449462if __name__ == '__main__' :
0 commit comments