Skip to content

Commit 8612ad6

Browse files
Maffoochclaude
andauthored
Fix import-languages 500 errors and optimize DB performance (#14553)
* Fix import-languages endpoint 500 errors and optimize performance The /api/v2/import-languages/ endpoint was producing 500 errors due to database integrity issues on Language_Type and Languages models. This commit addresses both reliability and performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix ruff lint errors in serializer and migration Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Consolidate two migrations into single 0262_language_type_unique_language Combines the data deduplication (RunPython) and schema change (AlterField) into a single migration file. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix test fixtures conflicting with Language_Type unique constraint Remove Language_Type entries from test fixtures that duplicate languages already seeded by migration 0115_language_types. Update Languages FK references to point to the correct seeded Language_Type PKs. - dojo_testdata.json: Remove JSON (pk=1) and Python (pk=2) Language_Type entries, update Languages FK from pk=1 to pk=94 (seeded JSON pk) - dojo_testdata_locations.json: Same changes - defect_dojo_sample_data.json: Remove 3 conflicting Language_Type entries (DOS Batch, InstallShield, Ruby) with PKs that differ from seed data - defect_dojo_sample_data_locations.json: Same changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Renumber migration from 0262 to 0263 to avoid conflict Migration 0262_remove_system_settings_credentials was merged to the bugfix branch. Renumber our migration to 0263 and update the dependency chain and max_migration.txt accordingly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cad2cc2 commit 8612ad6

10 files changed

Lines changed: 2123 additions & 2069 deletions

dojo/api_v2/serializers.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from django.contrib.auth.models import Group, Permission
1313
from django.contrib.auth.password_validation import validate_password
1414
from django.core.exceptions import PermissionDenied, ValidationError
15+
from django.db import transaction
1516
from django.db.utils import IntegrityError
1617
from django.urls import reverse
1718
from django.utils import timezone
@@ -2880,38 +2881,51 @@ def save(self):
28802881
deserialized = json.loads(data)
28812882
except Exception:
28822883
msg = "Invalid format"
2883-
raise Exception(msg)
2884+
raise serializers.ValidationError(msg)
28842885

2885-
# Filter out ignored keys
2886-
language_names = [name for name in deserialized if name not in {"header", "SUM"}]
2887-
# Prepopulate existing Language_Type objects
2888-
existing_types = {
2886+
# Filter out ignored keys and deduplicate
2887+
language_names = list(dict.fromkeys(
2888+
name for name in deserialized if name not in {"header", "SUM"}
2889+
))
2890+
# Ensure any new Language_Type records exist (ignore conflicts from
2891+
# concurrent requests or already-existing types)
2892+
Language_Type.objects.bulk_create(
2893+
[Language_Type(language=name) for name in language_names],
2894+
ignore_conflicts=True,
2895+
)
2896+
# Single query to fetch all Language_Type objects we need (indexed lookup)
2897+
language_types = {
28892898
lt.language: lt
28902899
for lt in Language_Type.objects.filter(language__in=language_names)
28912900
}
2892-
# Determine which Language_Type objects need to be created
2893-
new_language_names = [name for name in language_names if name not in existing_types]
2894-
new_types = [Language_Type(language=name) for name in new_language_names]
2895-
Language_Type.objects.bulk_create(new_types)
2896-
# Add newly created Language_Type objects to cache
2897-
for lt in Language_Type.objects.filter(language__in=new_language_names):
2898-
existing_types[lt.language] = lt
2899-
# Delete all Languages for this product
2900-
Languages.objects.filter(product=product).delete()
2901-
# Prepare Languages objects for bulk insert
2902-
languages_to_create = [
2901+
# Prepare Languages objects for upsert
2902+
languages_to_upsert = [
29032903
Languages(
29042904
product=product,
2905-
language=existing_types[name],
2905+
language=language_types[name],
29062906
files=deserialized[name].get("nFiles", 0),
29072907
blank=deserialized[name].get("blank", 0),
29082908
comment=deserialized[name].get("comment", 0),
29092909
code=deserialized[name].get("code", 0),
29102910
)
29112911
for name in language_names
29122912
]
2913-
# Bulk insert all Languages in one query
2914-
Languages.objects.bulk_create(languages_to_create)
2913+
# Upsert Languages and remove stale ones atomically
2914+
try:
2915+
with transaction.atomic():
2916+
Languages.objects.bulk_create(
2917+
languages_to_upsert,
2918+
update_conflicts=True,
2919+
unique_fields=["language", "product"],
2920+
update_fields=["files", "blank", "comment", "code"],
2921+
)
2922+
# Remove languages no longer present in the file
2923+
Languages.objects.filter(product=product).exclude(
2924+
language__in=language_types.values(),
2925+
).delete()
2926+
except IntegrityError as e:
2927+
msg = f"Failed to import languages due to a data integrity issue: {e}"
2928+
raise serializers.ValidationError(msg)
29152929

29162930
def validate(self, data):
29172931
if is_scan_file_too_large(data["file"]):
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import logging
2+
3+
from django.db import migrations, models
4+
from django.db.models import Count, Min
5+
6+
logger = logging.getLogger(__name__)
7+
8+
9+
def deduplicate_language_types(apps, schema_editor):
10+
"""
11+
Deduplicate Language_Type records by language name. For each set of
12+
duplicates, keep the lowest-ID record and reassign all Languages FK
13+
references to it, then delete the duplicates.
14+
"""
15+
Language_Type = apps.get_model("dojo", "Language_Type")
16+
Languages = apps.get_model("dojo", "Languages")
17+
18+
# Find language names that have duplicate Language_Type records
19+
dupes = (
20+
Language_Type.objects
21+
.values("language")
22+
.annotate(cnt=Count("id"), min_id=Min("id"))
23+
.filter(cnt__gt=1)
24+
)
25+
26+
total_reassigned = 0
27+
total_deleted_types = 0
28+
total_deleted_languages = 0
29+
30+
for dupe in dupes:
31+
canonical_id = dupe["min_id"]
32+
duplicate_ids = list(
33+
Language_Type.objects
34+
.filter(language=dupe["language"])
35+
.exclude(id=canonical_id)
36+
.values_list("id", flat=True),
37+
)
38+
39+
# Reassign Languages FKs from duplicates to the canonical record
40+
reassigned = Languages.objects.filter(
41+
language_id__in=duplicate_ids,
42+
).update(language_id=canonical_id)
43+
total_reassigned += reassigned
44+
45+
# After reassignment, there may be duplicate (language, product) pairs.
46+
# Find and remove them, keeping the lowest-ID Languages record per pair.
47+
conflicting_pairs = (
48+
Languages.objects
49+
.filter(language_id=canonical_id)
50+
.values("language_id", "product_id")
51+
.annotate(cnt=Count("id"), min_id=Min("id"))
52+
.filter(cnt__gt=1)
53+
)
54+
for pair in conflicting_pairs:
55+
deleted_count, _ = (
56+
Languages.objects
57+
.filter(
58+
language_id=pair["language_id"],
59+
product_id=pair["product_id"],
60+
)
61+
.exclude(id=pair["min_id"])
62+
.delete()
63+
)
64+
total_deleted_languages += deleted_count
65+
66+
# Delete the duplicate Language_Type records
67+
deleted_count, _ = Language_Type.objects.filter(id__in=duplicate_ids).delete()
68+
total_deleted_types += deleted_count
69+
70+
if total_deleted_types:
71+
logger.info(
72+
"Deduplicated Language_Type: removed %d duplicate types, "
73+
"reassigned %d Languages FK references, "
74+
"removed %d duplicate Languages records",
75+
total_deleted_types,
76+
total_reassigned,
77+
total_deleted_languages,
78+
)
79+
80+
81+
def noop_reverse(apps, schema_editor):
82+
pass
83+
84+
85+
class Migration(migrations.Migration):
86+
dependencies = [
87+
("dojo", "0262_remove_system_settings_credentials"),
88+
]
89+
90+
operations = [
91+
migrations.RunPython(deduplicate_language_types, noop_reverse),
92+
migrations.AlterField(
93+
model_name="language_type",
94+
name="language",
95+
field=models.CharField(max_length=100, unique=True),
96+
),
97+
]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0262_remove_system_settings_credentials
1+
0263_language_type_unique_language

0 commit comments

Comments
 (0)