Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Pau Pascual Mas](https://github.com/PauPascualMas)
- [Alba Talavera](https://github.com/albatalavera)
- [Alejandro Bernabeu](https://github.com/aberdur)
- [Victor Lopez](https://github.com/victor5lm)

#### Added enhancements

Expand All @@ -27,6 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix duplicated row in metadata form and clean some MEPRAM initial config [#879] (https://github.com/BU-ISCIII/relecov-tools/pull/879)
- Fix date validation in Excel metadata templates and add some schema validation checks [#886] (https://github.com/BU-ISCIII/relecov-tools/pull/886)
- Improve METADATA_LAB template formatting and duplicate sample detection [#887] (https://github.com/BU-ISCIII/relecov-tools/pull/887)
- Fixed the surveillance script for proper variant data handling [#891] (https://github.com/BU-ISCIII/relecov-tools/pull/891).

#### Changed

Expand Down
26 changes: 25 additions & 1 deletion relecov_tools/assets/pipeline_utils/create_summary_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def process_json_files(
long_table_file=None,
output_dir="surveillance_files",
specified_week=None,
specified_season=None,
copy_fasta=False,
):
os.makedirs(output_dir, exist_ok=True)
Expand Down Expand Up @@ -110,6 +111,9 @@ def process_json_files(
if specified_week and week != specified_week:
continue

if specified_season and season != specified_season:
continue

analysis_date = sample.get("bioinformatics_analysis_date", "-")

organism = sample.get("organism", "").lower()
Expand Down Expand Up @@ -355,7 +359,21 @@ def process_json_files(
season_dir = os.path.join(sars_output_dir, f"season_{season}")
os.makedirs(season_dir, exist_ok=True)
variant_csv_path = os.path.join(season_dir, "variant_data.csv")
df_variants = pd.DataFrame(variant_rows)
df_variants_new = pd.DataFrame(variant_rows)

if os.path.exists(variant_csv_path):
df_variants_existing = pd.read_csv(variant_csv_path, dtype=str)

df_variants = pd.concat(
[df_variants_existing, df_variants_new], ignore_index=True
)
else:
df_variants = df_variants_new

df_variants = df_variants.drop_duplicates(
subset=["SAMPLE", "CHROM", "POS", "REF", "ALT"]
)

df_variants.to_csv(variant_csv_path, index=False)
print(
f"Written variant data for SARS season {season} to {variant_csv_path}"
Expand Down Expand Up @@ -441,6 +459,11 @@ def process_json_files(
"--week",
help="Filter for specific epidemiological week (format: YYYY-WW)",
)
parser.add_argument(
"-s",
"--season",
help="Filter for specific epidemiological season (format: YYYY_YYYY)",
)
parser.add_argument(
"-c",
"--copy-fasta",
Expand All @@ -467,5 +490,6 @@ def process_json_files(
args.long_table_file,
args.output,
args.week,
args.season,
args.copy_fasta,
)
Loading