Skip to content

Commit e604d24

Browse files
chore(ci): fix release script
1 parent 76cdeb8 commit e604d24

4 files changed

Lines changed: 60 additions & 58 deletions

File tree

.github/workflows/build-and-deploy.yml

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@ defaults:
1919
shell: bash -euxo pipefail {0}
2020

2121
env:
22-
GITHUB_REPOSITORY_URL: ${{ github.server_url }}/${{ github.repository }}
2322
VERBOSE: 1
2423

2524
jobs:
2625
build-and-deploy-datasets:
27-
runs-on: ubuntu-22.04
26+
runs-on: ubuntu-20.04
2827

2928
environment:
3029
name: ${{ github.ref }}
@@ -37,6 +36,7 @@ jobs:
3736
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
3837
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3938
AWS_DEFAULT_REGION: us-east-2
39+
GH_TOKEN: ${{ github.token }}
4040

4141
steps:
4242
- name: "Checkout code"
@@ -45,31 +45,6 @@ jobs:
4545
fetch-depth: 0
4646
submodules: true
4747

48-
- name: "Authenticate git"
49-
run: |
50-
export GITHUB_TOKEN="${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }}"
51-
export BOT_GIT_USER_EMAIL="${{ secrets.BOT_GIT_USER_EMAIL }}"
52-
export BOT_GIT_USER_NAME="${{ secrets.BOT_GIT_USER_NAME }}"
53-
54-
: ${GITHUB_TOKEN?"The env var GITHUB_TOKEN is required"}
55-
: ${BOT_GIT_USER_EMAIL?"The env var BOT_GIT_USER_EMAIL is required"}
56-
: ${BOT_GIT_USER_NAME?"The env var BOT_GIT_USER_NAME is required"}
57-
58-
git config --global user.email "${BOT_GIT_USER_EMAIL}"
59-
git config --global user.name "${BOT_GIT_USER_NAME}"
60-
61-
gh auth setup-git >/dev/null
62-
63-
- name: "Extract branch name"
64-
run: |
65-
echo "current_branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
66-
id: branch-name
67-
68-
- name: "Checkout branch"
69-
run: |
70-
git fetch --all
71-
git switch ${{ steps.branch-name.outputs.current_branch }}
72-
7348
- name: "Install system dependencies"
7449
run: |
7550
sudo apt-get install brotli pigz parallel python3 rename --yes -qq >/dev/null
@@ -90,11 +65,17 @@ jobs:
9065
- name: "Rebuild, commit and push datasets"
9166
if: github.ref != 'refs/heads/release'
9267
run: |
68+
git config --global user.email "${{ secrets.BOT_GIT_USER_EMAIL }}"
69+
git config --global user.name "${{ secrets.BOT_GIT_USER_NAME }}"
70+
9371
./scripts/rebuild --input-dir 'data/' --output-dir 'data_output/' --push --repo="${GITHUB_REPOSITORY}"
9472
9573
- name: "Rebuild, commit, push and make a release"
9674
if: github.ref == 'refs/heads/release'
9775
run: |
76+
git config --global user.email "${{ secrets.BOT_GIT_USER_EMAIL }}"
77+
git config --global user.name "${{ secrets.BOT_GIT_USER_NAME }}"
78+
9879
./scripts/rebuild --input-dir 'data/' --output-dir 'data_output/' --release --repo="${GITHUB_REPOSITORY}"
9980
10081
- name: "Deploy dataset server"

scripts/lib/changelog.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44

55
def changelog_prepare(dataset, updated_at, changelog_path):
6-
path = dataset["path"]
6+
path = dict_get(dataset, ["path"])
7+
name = dict_get(dataset, ["attributes", "name"])
78
release_notes = changelog_get_unreleased_section(changelog_path)
89
if len(release_notes) == 0:
910
raise ValueError(
@@ -13,10 +14,10 @@ def changelog_prepare(dataset, updated_at, changelog_path):
1314
full_changelog = file_read(changelog_path).replace(f"## Unreleased", f"## {updated_at}")
1415
file_write(full_changelog, changelog_path)
1516

16-
attr_table = format_dataset_attributes_md_table(dict_get_required(dataset, ["attributes"]))
17+
# attr_table = format_dataset_attributes_md_table(dict_get_required(dataset, ["attributes"]))
1718
release_notes = release_notes.replace(
1819
"## Unreleased",
19-
f"""### {path}\n\n{attr_table}""".strip("\n ")
20+
f"""## {name} ({path})""".strip("\n ")
2021
)
2122

2223
return release_notes
@@ -39,10 +40,8 @@ def changelog_get_unreleased_section(changelog_path: str):
3940

4041

4142
def format_dataset_attributes_md_table(attributes):
42-
attr_table = f"| {'attribute':20} | {'value':20} | {'value friendly':40} |\n"
43-
attr_table += f"| {'-' * 20} | {'-' * 20} | {'-' * 40} |\n"
43+
attr_table = f"| {'attribute':20} | {'value':40} |\n"
44+
attr_table += f"| {'-' * 20} | {'-' * 40} |\n"
4445
for attr_name, attr_val in attributes.items():
45-
value = attr_val["value"]
46-
value_friendly = dict_get(attr_val, ["valueFriendly"]) or ""
47-
attr_table += f'| {attr_name:20} | {value:20} | {value_friendly:40} |\n'
46+
attr_table += f'| {attr_name:20} | {attr_val:40} |\n'
4847
return attr_table

scripts/lib/container.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections import namedtuple, Counter
22
from functools import reduce
3-
from typing import List, Iterable, TypeVar, Callable, Union, Dict, Any
3+
from typing import List, Iterable, TypeVar, Callable, Union, Dict, Any, Hashable, Optional, Sequence
44

55
T = TypeVar('T')
66

@@ -100,6 +100,15 @@ def unique(it: Iterable[T]):
100100
return iter(set(it))
101101

102102

103+
# https://stackoverflow.com/a/49168973
104+
def unique_by(values: Sequence[T], key: Optional[Callable[[T], Hashable]] = None) -> List[T]:
105+
return list(
106+
dict.fromkeys(values)
107+
if key is None
108+
else dict((key(value), value) for value in reversed(values)).values()
109+
)
110+
111+
103112
def find_duplicates(it: Iterable[T]) -> List[T]:
104113
return [x for x, occurrences in Counter(it).items() if occurrences > 1]
105114

scripts/rebuild

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ Builds a fresh data repo from source data
55
"""
66
import argparse
77
import json
8+
import time
89
from collections import defaultdict
910
from copy import deepcopy
1011
from os import getcwd
1112
from os.path import dirname, realpath, join, relpath, isfile
1213

1314
from lib.changelog import changelog_prepare, changelog_get_unreleased_section
14-
from lib.container import dict_get, dict_get_required, dict_set, unique, find_index_by, first, format_list, \
15+
from lib.container import dict_get, dict_get_required, dict_set, find_index_by, first, format_list, \
1516
dict_remove_many, find_duplicates, dict_cleanup
1617
from lib.date import now_iso, iso_to_iso_safe
1718
from lib.fasta import fasta_read_exactly_one_seq
@@ -117,14 +118,18 @@ def get_new_dataset_order(datasets, dataset_order):
117118
return dataset_order
118119

119120

120-
def sort_collections(collections, dataset_order):
121-
return [collection for x in dataset_order for collection in collections if collection["meta"]["id"] == x]
121+
def sort_collections(collections, collections_order):
122+
return [collection for x in collections_order for collection in collections if collection["meta"]["id"] == x]
122123

123124

124125
def sort_datasets(datasets, dataset_order):
125126
return [dataset for x in dataset_order for dataset in datasets if dataset["path"] == x]
126127

127128

129+
def sort_release_infos(release_infos, dataset_order):
130+
return [release_info for x in dataset_order for release_info in release_infos if release_info["dataset"]["path"] == x]
131+
132+
128133
def parse_args():
129134
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
130135

@@ -209,15 +214,15 @@ def main():
209214
tag = iso_to_iso_safe(updated_at) if args.release else "unreleased"
210215
git_check_tag(tag)
211216

212-
collection_json_paths = find_files("collection.json", args.input_dir)
217+
collection_json_paths = list(find_files("collection.json", args.input_dir))
213218

214219
collections = []
215220
release_infos = []
216221
all_refs = {}
217222
for collection_json_path in collection_json_paths:
218-
collection, release_infos, refs = process_one_collection(collection_json_path, args, tag, updated_at)
223+
collection, release_infos_for_dataset, refs = process_one_collection(collection_json_path, args, tag, updated_at)
219224
collections.append(collection)
220-
release_infos.extend(release_infos)
225+
release_infos.extend(release_infos_for_dataset)
221226
all_refs.update(refs)
222227

223228
collections = sort_collections(collections, ["nextstrain", "community"])
@@ -254,10 +259,14 @@ def main():
254259
commit_hash = commit_changes(args, tag, release_infos)
255260

256261
if args.push:
262+
l.info("Pushing commited changes to GitHub")
257263
git_push()
258264

259265
if args.release:
266+
l.info("Releasing to GitHub")
260267
release_notes = aggregate_release_notes(release_infos)
268+
l.info(f"Release notes:\n-------\n{release_notes}\n-------\nEnd of release notes\n")
269+
time.sleep(5)
261270
publish_to_github_releases(args, tag, commit_hash, release_notes)
262271

263272

@@ -285,6 +294,7 @@ def process_one_collection(collection_json_path, args, tag, updated_at):
285294
json_write(collection_json, collection_json_path, no_sort_keys=True)
286295

287296
release_infos = prepare_dataset_release_infos(args, datasets, collection_dir, tag, updated_at)
297+
release_infos = sort_release_infos(release_infos, dataset_order)
288298

289299
collection_info = deepcopy(collection_json)
290300

@@ -351,26 +361,33 @@ def prepare_dataset_release_infos(args, datasets, collection_dir, tag, updated_a
351361

352362

353363
def aggregate_release_notes(release_infos):
354-
dataset_names_friendly = format_list(
355-
unique([get_dataset_name_friendly(release_info["dataset"]) for release_info in release_infos]),
356-
sep="\n", marker="- ", quote=False
357-
)
358-
release_notes = f"This release contains changes for datasets:\n\n{dataset_names_friendly}\n\n\n"
364+
dataset_list = format_dataset_list(release_infos)
365+
release_notes = f"This release contains changes for datasets:\n\n{dataset_list}\n\n\n"
359366
for release_info in release_infos:
360367
release_notes += f'\n{release_info["release_notes"]}\n\n'
361368
return release_notes
362369

363370

371+
def format_dataset_list(release_infos):
372+
entries = [format_dataset_list_entry(release_info) for release_info in release_infos]
373+
return format_list(entries, sep="\n", marker="- ", quote=False)
374+
375+
376+
def format_dataset_list_entry(release_info):
377+
path = dict_get_required(release_info, ['dataset', 'path'])
378+
name = get_dataset_name_friendly(release_info['dataset'])
379+
return f"{path} ({name})"
380+
381+
364382
def commit_changes(args, tag, release_infos):
365-
l.info(f"Commiting changes for '{tag}'")
383+
l.info(f"Committing changes for '{tag}'")
366384

367385
commit_message = "chore: rebuild [skip ci]"
368386
if args.release:
369-
dataset_names = format_list(
370-
unique([get_dataset_name(release_info["dataset"]) for release_info in release_infos]),
371-
sep="\n", marker="- ", quote=False
372-
)
373-
commit_message = f"chore: release '{tag}'\n\nUpdated datasets:\n\n{dataset_names}"
387+
dataset_list = format_dataset_list(release_infos)
388+
commit_message = f"chore: release '{tag}'\n\nUpdated datasets:\n\n{dataset_list}"
389+
390+
l.info(f"Commit message:\n--------\n{commit_message}\n--------\nEnd of commit message\n\n")
374391

375392
return git_commit_all(commit_message)
376393

@@ -438,12 +455,8 @@ def create_dataset_package(args, dataset, pathogen_json, tag, dataset_dir):
438455
file_write("User-agent: *\nDisallow: /\n", join(args.output_dir, "robots.txt"))
439456

440457

441-
def get_dataset_name(dataset):
442-
return dict_get_required(dataset, ["attributes", "name", "value"])
443-
444-
445458
def get_dataset_name_friendly(dataset):
446-
return dict_get(dataset, ["attributes", "name", "valueFriendly"]) or get_dataset_name(dataset)
459+
return dict_get_required(dataset, ["attributes", "name"])
447460

448461

449462
if __name__ == '__main__':

0 commit comments

Comments
 (0)