Skip to content

Commit 79760dc

Browse files
authored
Sanitize dataset name (#274)
2 parents 9939358 + e64b1a0 commit 79760dc

2 files changed

Lines changed: 24 additions & 6 deletions

File tree

backend/archiver/scicat/scicat_tasks.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def create_job_result_object_task(dataset_ids: List[str]) -> JobResultObject:
216216

217217
job_results_object = JobResultObject(result=job_results)
218218

219-
script = create_download_script(job_results);
219+
script = create_download_script(job_results)
220220
job_results_object.downloadScript = base64.b64encode(bytes(script, 'utf-8'))
221221

222222
markdown = f"""Download script for all datablocks in this job\n```bash\n{script}\n```\n"""
@@ -226,28 +226,30 @@ def create_job_result_object_task(dataset_ids: List[str]) -> JobResultObject:
226226

227227
return job_results_object
228228

229+
229230
def create_download_script(job_result_entries: List[JobResultEntry]) -> str:
230231

231232
dataset_to_datablocks = {}
232233

233234
for result in job_result_entries:
234-
dataset_to_datablocks.setdefault(result.datasetId, []).append({"name" : Path(result.archiveId).name, "url" : result.url})
235+
dataset_to_datablocks.setdefault(result.datasetId, []).append({"name": Path(result.archiveId).name, "url": result.url})
235236

236237
return generate_download_script(dataset_to_datablocks)
237-
238-
238+
239239

240240
def create_presigned_url(client: S3Storage, datablock: DataBlock):
241241
url = client.get_presigned_url(Bucket.retrieval_bucket(), datablock.archiveId)
242242
return url
243243

244+
244245
def sanitize_name(name: str) -> str:
245246
invalid_chars = ["/", ".", "_"]
246-
sanitized_name = ""
247+
sanitized_name = name
247248
for c in invalid_chars:
248-
sanitized_name = name.replace(c, "-")
249+
sanitized_name = sanitized_name.replace(c, "-")
249250
return sanitized_name
250251

252+
251253
@log
252254
def create_job_result_entries(dataset_id: str, datablocks: List[DataBlock]) -> List[JobResultEntry]:
253255
s3_client = get_s3_client()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from scicat.scicat_tasks import sanitize_name
3+
4+
5+
def is_valid_string(input_string):
6+
pattern = r'^[a-z0-9-]+$' # Regex pattern
7+
return bool(re.match(pattern, input_string)) # Check for a match
8+
9+
10+
def test_sanitize_name():
11+
12+
dataset_id = "20.500.11935/71f11078-77bb-469d-90e6-dcb4a8fd7e93"
13+
14+
sanatized = sanitize_name(dataset_id)
15+
16+
assert is_valid_string(sanatized)

0 commit comments

Comments
 (0)