Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions label_studio/data_import/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _create_memory_efficient(self, project):
project_id=project.id,
result=Prediction.prepare_prediction_result(item.get('result'), project),
score=item.get('score'),
model_version=item.get('model_version', 'undefined'),
model_version=item.get('model_version') or None,
)
)
all_task_ids.add(task_id)
Expand Down Expand Up @@ -623,7 +623,7 @@ def _create_legacy(self, project):
project_id=project.id,
result=Prediction.prepare_prediction_result(item.get('result'), project),
score=item.get('score'),
model_version=item.get('model_version', 'undefined'),
model_version=item.get('model_version') or None,
)
)
except Exception as e:
Expand Down
12 changes: 9 additions & 3 deletions label_studio/projects/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,13 +881,19 @@ def get(self, request, *args, **kwargs):

def delete(self, request, *args, **kwargs):
project = self.get_object()
model_version = request.data.get('model_version', None)

if not model_version:
if 'model_version' not in request.data:
raise RestValidationError('model_version param is required')
model_version = request.data.get('model_version')

count = project.delete_predictions(model_version=model_version)
# Normalize representations of "no model version" to None. JSON null
# already arrives as None; the empty string is the model field default,
# and the literal string 'undefined' is legacy data from older imports
# (see migration that backfills it to NULL).
if model_version in (None, '', 'undefined'):
model_version = None

count = project.delete_predictions(model_version=model_version)
return Response(data=count)


Expand Down
22 changes: 13 additions & 9 deletions label_studio/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,20 +797,24 @@ def should_none_model_version(self, model_version):

def delete_predictions(self, model_version=None):
"""
Deletes the predictions based on the provided model version.
If no model version is provided, it deletes all the predictions for this project.
Deletes predictions for this project filtered by model version.

:param model_version: Identifier of the model version (default is None)
:param model_version: If None, deletes predictions whose model_version
is NULL, empty, or the legacy placeholder 'undefined' (predictions
imported without a version label — see the data migration that
backfills 'undefined' to NULL). Otherwise filters by exact match.
:type model_version: str, optional
:return: Dictionary with count of deleted predictions
:rtype: dict
"""
params = {'project': self}

if model_version:
params.update({'model_version': model_version})

predictions = Prediction.objects.filter(**params)
if model_version is None:
predictions = Prediction.objects.filter(project=self).filter(
Q(model_version__isnull=True)
| Q(model_version='')
| Q(model_version='undefined')
)
else:
predictions = Prediction.objects.filter(project=self, model_version=model_version)

with transaction.atomic():
# If we are deleting specific model_version then we need
Expand Down
88 changes: 88 additions & 0 deletions label_studio/projects/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,91 @@ def test_extended(self):
assert response.json()['static'][1]['count'] == 1
assert response.json()['static'][2]['model_version'] == 'model_1'
assert response.json()['static'][2]['count'] == 2


class TestDeletePredictionsAPI(APITestCase):
"""Covers the DELETE /api/projects/<pk>/model-versions endpoint.

Regression coverage for issue #9717: predictions imported without a
model_version (stored as NULL, empty string, or the legacy 'undefined'
placeholder) must be deletable.
"""

def setUp(self):
self.project = ProjectFactory()
self.user = self.project.created_by
self.task = TaskFactory(project=self.project)
self.client.force_authenticate(user=self.user)
self.url = f'/api/projects/{self.project.id}/model-versions'

def _prediction_count(self, **filters):
return self.task.predictions.filter(**filters).count()

def test_delete_by_specific_version(self):
PredictionFactory(task=self.task, model_version='v1')
PredictionFactory(task=self.task, model_version='v1')
PredictionFactory(task=self.task, model_version='v2')

response = self.client.delete(self.url, data={'model_version': 'v1'}, format='json')

assert response.status_code == 200
assert self._prediction_count(model_version='v1') == 0
assert self._prediction_count(model_version='v2') == 1

def test_delete_null_version_via_json_null(self):
PredictionFactory(task=self.task, model_version=None)
PredictionFactory(task=self.task, model_version='v1')

response = self.client.delete(self.url, data={'model_version': None}, format='json')

assert response.status_code == 200
assert self._prediction_count(model_version__isnull=True) == 0
assert self._prediction_count(model_version='v1') == 1

def test_delete_null_version_via_empty_string(self):
PredictionFactory(task=self.task, model_version='')
PredictionFactory(task=self.task, model_version='v1')

response = self.client.delete(self.url, data={'model_version': ''}, format='json')

assert response.status_code == 200
assert self._prediction_count(model_version='') == 0
assert self._prediction_count(model_version='v1') == 1

def test_delete_legacy_undefined_string(self):
"""Pre-migration rows have model_version='undefined'. Migration 0062
backfills these to NULL, but the API still accepts the legacy string
so it works during a rolling deployment.
"""
PredictionFactory(task=self.task, model_version='undefined')
PredictionFactory(task=self.task, model_version='v1')

response = self.client.delete(self.url, data={'model_version': 'undefined'}, format='json')

assert response.status_code == 200
assert self._prediction_count(model_version='undefined') == 0
assert self._prediction_count(model_version='v1') == 1

def test_delete_null_version_groups_legacy_representations(self):
"""A single null-version delete request should sweep NULL, empty, and
the legacy 'undefined' placeholder together so callers don't need to
know about the pre-migration data layout.
"""
PredictionFactory(task=self.task, model_version=None)
PredictionFactory(task=self.task, model_version='')
PredictionFactory(task=self.task, model_version='undefined')
PredictionFactory(task=self.task, model_version='v1')

response = self.client.delete(self.url, data={'model_version': None}, format='json')

assert response.status_code == 200
assert self.task.predictions.exclude(model_version='v1').count() == 0
assert self._prediction_count(model_version='v1') == 1

def test_delete_requires_model_version_key(self):
PredictionFactory(task=self.task, model_version='v1')

response = self.client.delete(self.url, data={}, format='json')

assert response.status_code == 400
assert self._prediction_count(model_version='v1') == 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Backfill legacy `Prediction.model_version='undefined'` rows to NULL.

Older code paths (`data_import/api.py`, `tasks/serializers.py`) defaulted the
`model_version` field to the literal string `'undefined'` when an imported
prediction did not specify one. That created two distinct "no version"
representations (NULL and the string 'undefined') and caused the predictions
delete endpoint to fail. The application layer now uses NULL as the canonical
representation, and this migration normalizes existing rows.

Runs in batches of BATCH_SIZE rows so the UPDATE does not hold table-wide
locks for the entire duration on installations with a large Prediction table.
The reverse migration is intentionally a no-op: we cannot tell migrated rows
apart from genuinely-NULL rows after the fact.
"""

from django.db import migrations

BATCH_SIZE = 1000


def backfill_undefined_to_null(apps, schema_editor):
Prediction = apps.get_model('tasks', 'Prediction')
while True:
ids = list(
Prediction.objects.filter(model_version='undefined')
.values_list('id', flat=True)[:BATCH_SIZE]
)
if not ids:
return
Prediction.objects.filter(id__in=ids).update(model_version=None)


class Migration(migrations.Migration):
# Each batch commits independently so a single long transaction does not
# block other writes for the duration of the migration.
atomic = False

dependencies = [
('tasks', '0061_task_project_file_upload_idx_async'),
]
operations = [
migrations.RunPython(
backfill_undefined_to_null,
reverse_code=migrations.RunPython.noop,
),
]
2 changes: 1 addition & 1 deletion label_studio/tasks/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def add_predictions(self, task_predictions):
)
prediction_score = None

last_model_version = prediction.get('model_version', 'undefined')
last_model_version = prediction.get('model_version') or None
db_predictions.append(
Prediction(
task=self.db_tasks[i],
Expand Down
54 changes: 54 additions & 0 deletions label_studio/tests/test_prediction_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,3 +1068,57 @@ def test_taxonomy_prediction_validation(self):
assert serializer.is_valid()
# Should not raise due to taxonomy flattening in value label validation
serializer.save(project_id=project.id)


@pytest.mark.django_db
class TestImportPredictionsModelVersionDefault:
"""Predictions imported without a model_version should land as NULL.

Regression coverage for the pairing fix that also updates the DELETE
endpoint: keeping these two halves in sync prevents the legacy
'undefined' placeholder from re-entering the database.
"""

@pytest.fixture(autouse=True)
def setup(self, django_db_setup, django_db_blocker):
with django_db_blocker.unblock():
self.user = UserFactory()
self.organization = OrganizationFactory(created_by=self.user)
self.user.active_organization = self.organization
self.user.save()
self.project = ProjectFactory(
title='Import default test',
organization=self.organization,
created_by=self.user,
)
self.task = TaskFactory(project=self.project, data={'text': 'x'})

def _post(self, payload):
request = APIRequestFactory().post(
f'/api/projects/{self.project.id}/import/predictions',
data=payload,
format='json',
)
force_authenticate(request, user=self.user)
return ImportPredictionsAPI.as_view()(request, pk=self.project.id)

def test_missing_model_version_becomes_null(self):
response = self._post([{'result': [], 'task': self.task.id}])

assert response.status_code == 201
prediction = Prediction.objects.get(task=self.task)
assert prediction.model_version is None

def test_empty_model_version_becomes_null(self):
response = self._post([{'result': [], 'task': self.task.id, 'model_version': ''}])

assert response.status_code == 201
prediction = Prediction.objects.get(task=self.task)
assert prediction.model_version is None

def test_explicit_model_version_preserved(self):
response = self._post([{'result': [], 'task': self.task.id, 'model_version': 'v1'}])

assert response.status_code == 201
prediction = Prediction.objects.get(task=self.task)
assert prediction.model_version == 'v1'
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@ const VersionCard = ({ version, selected, onSelect, editable, onDelete }) => {
[version, onDelete],
);

const hasNoVersion = !version.model_version || version.model_version === "undefined";

return (
<div className={rootClass.toClassName()}>
<div>
<div className={rootClass.elem("title").toClassName()}>
{version.model_version}
{version.model_version === "undefined" && (
<Tooltip title="Model version is undefined. Likely means that model_version field was missing when predictions were imported.">
{hasNoVersion ? "No model version" : version.model_version}
{hasNoVersion && (
<Tooltip title="No model version on these predictions. Usually means the model_version field was missing when predictions were imported.">
<IconInfoOutline className={cn("help-icon").toClassName()} width="14" height="14" />
</Tooltip>
)}
Expand Down
Loading