Skip to content

Commit 0caf4ef

Browse files
feat(ingest, backend): improve ingest curation warning - add diff (#6633)
resolves #3084 Adds the option to filter by AccessionVersion on the `get-unprocessed-metadata` endpoint. Have ingest call this endpoint to get all previous metadata (includes sequence hashes, so it will indirectly tell us if a sequence has been changed) when we get into the case where ingest would like to revise a curated sequence. Ingest will then add a diff of the curated metadata and the metadata it would like to revise to the notification so that curators can decide better to to handle the situation. ### PR Checklist - [x] All necessary documentation has been adapted for backend endpoints - [x] The backend endpoint updates are covered by unit tests - [x] Any manual testing that has been done is documented (i.e. what exactly was tested?) 1. Ingest sequences from the INSDC with initial hash 2. Curate a sequence from the INSDC as a superuser (add a revision, do not change hash) 3. Ingest ignores revision as the hash has not changed 4. Log into database and manually modify the hash of the curated entry (unprocessed data not original data) 5. Run the ingest cronjob and verify that an informative message is sent to the loculus notifications slack: <img width="2254" height="204" alt="image" src="https://github.com/user-attachments/assets/2e91f1b8-6b94-4801-9624-c6fc53cecaaf" /> 🚀 Preview: https://curation-warning-add-diff.loculus.org --------- Co-authored-by: Cornelius Roemer <cornelius.roemer@gmail.com>
1 parent e539e8a commit 0caf4ef

11 files changed

Lines changed: 175 additions & 48 deletions

File tree

backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.JsonDeserializer
88
import com.fasterxml.jackson.databind.JsonNode
99
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
1010
import io.swagger.v3.oas.annotations.media.Schema
11+
import org.loculus.backend.controller.UnprocessableEntityException
1112
import org.loculus.backend.model.FastaId
1213
import org.loculus.backend.model.SubmissionId
1314
import org.loculus.backend.service.files.FileId
@@ -26,7 +27,32 @@ interface AccessionVersionInterface {
2627
}
2728

2829
data class AccessionVersion(override val accession: Accession, override val version: Version) :
29-
AccessionVersionInterface
30+
AccessionVersionInterface {
31+
companion object {
32+
fun fromString(value: String): AccessionVersion {
33+
val accession = value.substringBeforeLast('.', missingDelimiterValue = "")
34+
val versionString = value.substringAfterLast('.', missingDelimiterValue = "")
35+
36+
if (accession.isEmpty() || versionString.isEmpty() || '.' in accession) {
37+
throw UnprocessableEntityException(
38+
"Invalid accession version format '$value', expected 'accession.version'",
39+
)
40+
}
41+
42+
val version = versionString.toLongOrNull()
43+
?: throw UnprocessableEntityException(
44+
"Invalid version in accession version '$value', expected a number",
45+
)
46+
if (version < 1) {
47+
throw UnprocessableEntityException(
48+
"Invalid version in accession version '$value', version must be at least 1",
49+
)
50+
}
51+
52+
return AccessionVersion(accession, version)
53+
}
54+
}
55+
}
3056

3157
data class SubmissionIdMapping(
3258
override val accession: Accession,

backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,10 @@ open class SubmissionController(
433433
@Parameter(
434434
description = "The metadata fields that should be returned. If not provided, all fields are returned.",
435435
) @RequestParam(required = false) fields: List<String>?,
436+
@Parameter(
437+
description = "Filter by accession versions in 'accession.version' format. " +
438+
"If not provided, all accession versions are considered.",
439+
) @RequestParam(required = false) accessionVersionsFilter: List<String>?,
436440
@Parameter(
437441
description = "Filter by group ids. If not provided, all groups are considered.",
438442
) @RequestParam(required = false) groupIdsFilter: List<Int>?,
@@ -448,11 +452,16 @@ open class SubmissionController(
448452
headers.add(HttpHeaders.CONTENT_ENCODING, compression.compressionName)
449453
}
450454

455+
val parsedAccessionVersions = accessionVersionsFilter?.takeIf { it.isNotEmpty() }?.map {
456+
AccessionVersion.fromString(it)
457+
}
458+
451459
val totalRecords = submissionDatabaseService.countUnprocessedMetadata(
452460
authenticatedUser,
453461
organism,
454462
groupIdsFilter?.takeIf { it.isNotEmpty() },
455463
statusesFilter?.takeIf { it.isNotEmpty() },
464+
parsedAccessionVersions,
456465
)
457466
headers.add(X_TOTAL_RECORDS, totalRecords.toString())
458467
// TODO(https://github.com/loculus-project/loculus/issues/2778)
@@ -468,6 +477,7 @@ open class SubmissionController(
468477
groupIdsFilter?.takeIf { it.isNotEmpty() },
469478
statusesFilter?.takeIf { it.isNotEmpty() },
470479
fields?.takeIf { it.isNotEmpty() },
480+
parsedAccessionVersions,
471481
)
472482
}
473483

backend/src/main/kotlin/org/loculus/backend/service/seqsetcitations/SeqSetCitationsDatabaseService.kt

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -482,20 +482,7 @@ class SeqSetCitationsDatabaseService(
482482
}
483483
val accessionsWithVersions = mutableListOf<AccessionVersion>()
484484
for (record in seqSetRecords.filter { it.accession.contains('.') }) {
485-
val parts = record.accession.split('.')
486-
if (parts.size != 2) {
487-
throw UnprocessableEntityException(
488-
"Invalid accession format '${record.accession}': expected format 'ACCESSION.VERSION'",
489-
)
490-
}
491-
val versionString = parts[1]
492-
val version = versionString.toLongOrNull()
493-
if (version == null) {
494-
throw UnprocessableEntityException(
495-
"Invalid version in accession '${record.accession}': '$versionString' is not a valid integer",
496-
)
497-
}
498-
accessionsWithVersions.add(AccessionVersion(parts[0], version))
485+
accessionsWithVersions.add(AccessionVersion.fromString(record.accession))
499486
}
500487

501488
for (chunk in accessionsWithVersions.chunked(1000)) {

backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,7 @@ class SubmissionDatabaseService(
12311231
organism: Organism,
12321232
groupIdsFilter: List<Int>?,
12331233
statusesFilter: List<Status>?,
1234+
accessionVersionsFilter: List<AccessionVersion>?,
12341235
): Op<Boolean> {
12351236
val organismCondition = SequenceEntriesView.organismIs(organism)
12361237
val groupCondition = getGroupCondition(groupIdsFilter, authenticatedUser)
@@ -1239,7 +1240,12 @@ class SubmissionDatabaseService(
12391240
} else {
12401241
Op.TRUE
12411242
}
1242-
val conditions = organismCondition and groupCondition and statusCondition
1243+
val accessionVersionCondition = if (accessionVersionsFilter != null) {
1244+
SequenceEntriesView.accessionVersionIsIn(accessionVersionsFilter)
1245+
} else {
1246+
Op.TRUE
1247+
}
1248+
val conditions = organismCondition and groupCondition and statusCondition and accessionVersionCondition
12431249

12441250
return conditions
12451251
}
@@ -1249,6 +1255,7 @@ class SubmissionDatabaseService(
12491255
organism: Organism,
12501256
groupIdsFilter: List<Int>?,
12511257
statusesFilter: List<Status>?,
1258+
accessionVersionsFilter: List<AccessionVersion>?,
12521259
): Long = SequenceEntriesView
12531260
.selectAll()
12541261
.where(
@@ -1257,6 +1264,7 @@ class SubmissionDatabaseService(
12571264
organism,
12581265
groupIdsFilter,
12591266
statusesFilter,
1267+
accessionVersionsFilter,
12601268
),
12611269
)
12621270
.count()
@@ -1267,6 +1275,7 @@ class SubmissionDatabaseService(
12671275
groupIdsFilter: List<Int>?,
12681276
statusesFilter: List<Status>?,
12691277
fields: List<String>?,
1278+
accessionVersionsFilter: List<AccessionVersion>?,
12701279
): Sequence<AccessionVersionUnprocessedMetadata> {
12711280
val unprocessedMetadata = SequenceEntriesView.unprocessedDataColumn
12721281
// It's actually <Map<String, String>?> but exposed does not support nullable types here
@@ -1287,6 +1296,7 @@ class SubmissionDatabaseService(
12871296
organism,
12881297
groupIdsFilter,
12891298
statusesFilter,
1299+
accessionVersionsFilter,
12901300
),
12911301
)
12921302
.fetchSize(streamBatchSize)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package org.loculus.backend.api
2+
3+
import org.hamcrest.MatcherAssert.assertThat
4+
import org.hamcrest.Matchers.`is`
5+
import org.junit.jupiter.api.Test
6+
import org.junit.jupiter.api.assertThrows
7+
import org.loculus.backend.controller.UnprocessableEntityException
8+
9+
class SubmissionTypesTest {
10+
@Test
11+
fun `GIVEN valid accession version string THEN parses accession and version`() {
12+
assertThat(AccessionVersion.fromString("LOC_123.42"), `is`(AccessionVersion("LOC_123", 42)))
13+
}
14+
15+
@Test
16+
fun `GIVEN accession version with invalid format THEN throws format exception`() {
17+
listOf("LOC_123", ".1", "LOC_123.", "LOC.123.1").forEach {
18+
assertThrows<UnprocessableEntityException> {
19+
AccessionVersion.fromString(it)
20+
}
21+
}
22+
}
23+
24+
@Test
25+
fun `GIVEN non-numeric version THEN throws version exception`() {
26+
val exception = assertThrows<UnprocessableEntityException> {
27+
AccessionVersion.fromString("LOC_123.invalid")
28+
}
29+
30+
assertThat(
31+
exception.message,
32+
`is`("Invalid version in accession version 'LOC_123.invalid', expected a number"),
33+
)
34+
}
35+
}

backend/src/test/kotlin/org/loculus/backend/controller/seqsetcitations/SeqSetValidationEndpointsTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class SeqSetValidationEndpointsTest(
6767
jsonPath(
6868
"\$.detail",
6969
containsString(
70-
"Invalid version in accession 'ABCD.EF'",
70+
"Invalid version in accession version 'ABCD.EF', expected a number",
7171
),
7272
),
7373
)

backend/src/test/kotlin/org/loculus/backend/controller/submission/GetUnprocessedMetadataEndpointTest.kt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,29 @@ class GetUnprocessedMetadataEndpointTest(
190190
.andExpect(status().isOk)
191191
}
192192

193+
@Test
194+
fun `WHEN I filter by accessionVersions THEN should return only those entries`() {
195+
val entries = convenienceClient.prepareDefaultSequenceEntriesToApprovedForRelease()
196+
val target = entries.first()
197+
val accessionVersion = target.displayAccessionVersion()
198+
199+
val response = submissionControllerClient.getUnprocessedMetadata(
200+
accessionVersionsFilter = listOf(accessionVersion),
201+
)
202+
response.andExpect(status().isOk)
203+
.andExpect(header().string("x-total-records", `is`("1")))
204+
val responseBody = response.expectNdjsonAndGetContent<AccessionVersionUnprocessedMetadata>()
205+
assertThat(responseBody, hasSize(1))
206+
assertThat(responseBody[0].displayAccessionVersion(), `is`(accessionVersion))
207+
}
208+
209+
@Test
210+
fun `WHEN I filter by accessionVersions with invalid format THEN returns 422`() {
211+
submissionControllerClient.getUnprocessedMetadata(
212+
accessionVersionsFilter = listOf("not-a-valid-accession-version"),
213+
).andExpect(status().isUnprocessableEntity)
214+
}
215+
193216
// Regression test for https://github.com/loculus-project/loculus/issues/4036
194217
@Test
195218
fun `GIVEN revoked sequences exist THEN endpoint does not throw exception`() {

backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmissionControllerClient.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ class SubmissionControllerClient(private val mockMvc: MockMvc, private val objec
307307
jwt: String? = jwtForDefaultUser,
308308
groupIdsFilter: List<Int>? = null,
309309
statusesFilter: List<Status>? = null,
310+
accessionVersionsFilter: List<String>? = null,
310311
fields: List<String>? = null,
311312
compression: String? = null,
312313
): ResultActions = mockMvc.perform(
@@ -320,6 +321,7 @@ class SubmissionControllerClient(private val mockMvc: MockMvc, private val objec
320321
}
321322
.param("groupIdsFilter", groupIdsFilter?.joinToString(",") { it.toString() })
322323
.param("statusesFilter", statusesFilter?.joinToString(",") { it.name })
324+
.param("accessionVersionsFilter", accessionVersionsFilter?.joinToString(","))
323325
.param("fields", fields?.joinToString(",")),
324326
)
325327

ingest/scripts/call_loculus.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,15 @@ def record_factory(*args, **kwargs):
106106

107107
if mode == "get-submitted":
108108
logger.info("Getting submitted sequences")
109-
get_submitted(config, output)
109+
if config.segmented:
110+
insdc_key = [
111+
"insdcAccessionBase" + "_" + segment for segment in config.nucleotide_sequences
112+
]
113+
else:
114+
insdc_key = ["insdcAccessionBase"]
115+
116+
fields = ["hash", *insdc_key]
117+
get_submitted(config, output, fields)
110118

111119

112120
if __name__ == "__main__":

0 commit comments

Comments
 (0)