Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

- Add background execution mode for wrapper [#880](https://github.com/BU-ISCIII/relecov-tools/pull/880)
- Add weekly SFTP upload report [#881](https://github.com/BU-ISCIII/relecov-tools/pull/881)
- Upload pipeline-manager samples to iSkyLIMS [#882](https://github.com/BU-ISCIII/relecov-tools/pull/882)

#### Fixes

Expand Down
13 changes: 12 additions & 1 deletion relecov_tools/conf/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
"analysis_folder": "ANALYSIS",
"sample_stored_folder": "RAW",
"sample_link_folder": "00-reads",
"sample_upload_platforms": [
"relecov",
"iskylims"
],
"organism_config": {}
},
"sftp_handle": {
Expand Down Expand Up @@ -248,7 +252,14 @@
},
"update_db": {
"required_conf": ["platform-params", "data_upload_types", "full_update_steps", "iskylims_fixed_values"],
"platform-params": {},
"platform-params": {
"iskylims": {
"display_name": "iSkyLIMS"
},
"relecov": {
"display_name": "relecov-platform"
}
},
"data_upload_types": [
"sample",
"bioinfodata",
Expand Down
5 changes: 5 additions & 0 deletions relecov_tools/conf/initial_config-EQA2026.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ update_db:
long_table: ''
platform-params:
iskylims:
display_name: iSkyLIMS
server_url: http://relecov-iskylims.isciiides.es
api_url: /wetlab/api/
store_samples: create-sample
Expand All @@ -211,6 +212,7 @@ update_db:
project_name: relecov
token: ''
relecov:
display_name: relecov-platform
server_url: http://relecov-platform.isciiides.es
api_url: /api/
store_samples: createSampleData
Expand Down Expand Up @@ -278,6 +280,9 @@ pipeline_manager:
folder_names: []
analysis_group: RLV
analysis_user: icasas_C
sample_upload_platforms:
- relecov
- iskylims
organism_config:
Severe acute respiratory syndrome coronavirus 2 [LOINC:LA31065-8]:
pipeline_template: viralrecon
Expand Down
5 changes: 5 additions & 0 deletions relecov_tools/conf/initial_config-MEPRAM.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ update_db:
long_table: ''
platform-params:
iskylims:
display_name: iSkyLIMS
server_url: http://relecov-iskylims.isciiides.es
api_url: /wetlab/api/
store_samples: create-sample
Expand All @@ -217,6 +218,7 @@ update_db:
project_name: relecov
token: ''
relecov:
display_name: relecov-platform
server_url: http://relecov-platform.isciiides.es
api_url: /api/
store_samples: createSampleData
Expand Down Expand Up @@ -284,6 +286,9 @@ pipeline_manager:
folder_names: []
analysis_group: RLV
analysis_user: icasas_C
sample_upload_platforms:
- relecov
- iskylims
organism_config:
Severe acute respiratory syndrome coronavirus 2 [LOINC:LA31065-8]:
pipeline_template: viralrecon
Expand Down
5 changes: 5 additions & 0 deletions relecov_tools/conf/initial_config-relecov.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ update_db:
long_table: ''
platform-params:
iskylims:
display_name: iSkyLIMS
server_url: http://relecov-iskylims.isciiides.es
api_url: /wetlab/api/
store_samples: create-sample
Expand All @@ -239,6 +240,7 @@ update_db:
project_name: relecov
token: '' # Fill with your platform token
relecov:
display_name: relecov-platform
server_url: http://relecov-platform.isciiides.es
api_url: /api/
store_samples: createSampleData
Expand Down Expand Up @@ -307,6 +309,9 @@ pipeline_manager:
folder_names: []
analysis_group: RLV
analysis_user: icasas_C
sample_upload_platforms:
- relecov
- iskylims
organism_config:
Severe acute respiratory syndrome coronavirus 2 [LOINC:LA31065-8]:
pipeline_template: viralrecon
Expand Down
121 changes: 103 additions & 18 deletions relecov_tools/pipeline_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,13 @@ def __init__(
self.linked_sample_folder = config_data["sample_link_folder"]
self.doc_folder = config_data["doc_folder"]
self.organism_config = config_data["organism_config"]
self.sample_upload_platforms = config_data.get("sample_upload_platforms", [])
self.not_provided_field = (
self.config.get_topic_data("generic", "not_provided_field")
or "Not Provided [SNOMED:434941000124101]"
)

req_conf = ["update_db"] * bool(self.skip_db_upload)
req_conf = ["update_db"] * (not bool(self.skip_db_upload))
missing = [
conf for conf in req_conf if self.config.get_configuration(conf) is None
]
Expand Down Expand Up @@ -775,27 +776,107 @@ def update_db_samples(self, json_data: list[dict]) -> list[dict]:
raise ValueError(
f"Missing required configuration for upload_db: {missing_conf}"
)
upload_db = relecov_tools.upload_database.UploadDatabase(
user=upload_db_conf["user"],
password=upload_db_conf["password"],
json=copy.deepcopy(json_data),
type="sample",
platform=upload_db_conf["platform"],
)
upload_db.start_api(upload_db_conf["platform"])
result = upload_db.store_data("sample", upload_db_conf["platform"])
platforms = self._get_sample_upload_platforms(upload_db_conf)
unique_id_result = []
uploaded_platforms = []

for platform in platforms:
upload_db = relecov_tools.upload_database.UploadDatabase(
user=upload_db_conf["user"],
password=upload_db_conf["password"],
json=copy.deepcopy(json_data),
type="sample",
platform=platform,
)
upload_db.start_api(platform)
platform_settings = upload_db_conf.get("platform-params", {})
platform_name = platform_settings.get(platform, {}).get(
"display_name", platform
)
self.log.info("Uploading sample data to %s", platform_name)
stderr.print(f"[blue]Uploading sample data to {platform_name}")
result = upload_db.store_data("sample", platform)

if not result:
self.log.error("No sample data was uploaded to %s", platform)
stderr.print(f"[red] No sample data was uploaded to {platform}")
raise ValueError(f"No sample data was uploaded to {platform}")

uploaded_platforms.append(platform)
if any(
"sample_fingerprint" in row and "sample_unique_id" in row
for row in result
):
unique_id_result = result
json_data = self.assign_unique_ids_by_fingerprint(
json_data, unique_id_result
)

if not result:
if not uploaded_platforms:
self.log.error("No data was uploaded to the database")
stderr.print("[red] No data was uploaded to the database")
raise ValueError("No data was uploaded to the database")
self.log.info("Database updated with %s samples", len(json_data))
stderr.print(f"[blue] Database updated with {len(json_data)} samples")
self.log.info(
"Database updated with %s samples in %s",
len(json_data),
", ".join(uploaded_platforms),
)
uploaded_platforms_text = ", ".join(uploaded_platforms)
stderr.print(
f"[blue] Database updated with {len(json_data)} samples "
f"in {uploaded_platforms_text}"
)

json_data = self.assign_unique_ids_by_fingerprint(json_data, result)
if unique_id_result:
missing_unique_ids = [
row.get("sequencing_sample_id", "unknown")
for row in json_data
if not row.get("unique_sample_id")
]
if missing_unique_ids:
logtxt = (
"Some samples did not receive unique_sample_id: "
f"{missing_unique_ids}"
)
self.log.warning(logtxt)
stderr.print(f"[yellow]{logtxt}")
else:
logtxt = (
"No upload response included sample_unique_id. "
"Pipeline sample names will not include unique IDs."
)
self.log.warning(logtxt)
stderr.print(f"[yellow]{logtxt}")

return json_data

def _get_sample_upload_platforms(self, upload_db_conf: dict) -> list[str]:
"""Return the ordered platforms where pipeline-manager should upload samples."""
configured_platform = upload_db_conf["platform"]
platforms = [str(platform) for platform in self.sample_upload_platforms]
if not platforms:
full_update_steps = upload_db_conf.get("full_update_steps", [])
platforms = [
str(step["platform"])
for step in full_update_steps
if isinstance(step, dict)
and step.get("type") == "sample"
and step.get("platform")
]
if not platforms:
platforms = [configured_platform]

platforms = list(dict.fromkeys(platforms))
if "iskylims" in platforms and (
"relecov" not in platforms
or platforms.index("relecov") > platforms.index("iskylims")
):
raise ValueError(
"pipeline_manager.sample_upload_platforms must upload to "
"'relecov' before 'iskylims' so unique_sample_id is available."
)
return platforms

def pipeline_exc(self):
"""Prepare folder for analysis in HPC
Copies template selected as input
Expand Down Expand Up @@ -829,10 +910,14 @@ def pipeline_exc(self):
self.log.info("Batch ID set to %s", batch_id)
stderr.print(f"[blue]Batch ID set to {batch_id}")

self.log.info("Updating database with samples data")
stderr.print("[blue]Updating database with samples data")
# Update the database with the samples data
join_validate = self.update_db_samples(join_validate)
if self.skip_db_upload:
self.log.info("Skipping database upload with samples data")
stderr.print("[blue]Skipping database upload with samples data")
else:
self.log.info("Updating database with samples data")
stderr.print("[blue]Updating database with samples data")
# Update the database with the samples data
join_validate = self.update_db_samples(join_validate)

stderr.print("[blue]Collecting samples by organism")
self.log.info("Collecting samples by organism")
Expand Down
9 changes: 9 additions & 0 deletions relecov_tools/upload_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,15 @@ def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields):
)
if project_name:
s_dict["sample_project"] = project_name
if s_dict.get("sample_name") in (None, "", "Not Provided"):
unique_sample_id = row.get("unique_sample_id")
if not unique_sample_id:
sid = row.get("sequencing_sample_id", row.get("sequence_file_R1"))
raise ValueError(
"Cannot upload sample to iSkyLIMS without unique_sample_id "
f"(sample: {sid})"
)
s_dict["sample_name"] = unique_sample_id
all_iskylims_fields = s_project_fields + s_fields
sid = row.get("sequencing_sample_id", row.get("sequence_file_R1"))
for missing in list(set(list(row.keys())) - set(all_iskylims_fields)):
Expand Down
Loading