-
-
Notifications
You must be signed in to change notification settings - Fork 191
Expand file tree
/
Copy path0055_discoveredpackage_datafile_paths.py
More file actions
75 lines (61 loc) · 2.22 KB
/
0055_discoveredpackage_datafile_paths.py
File metadata and controls
75 lines (61 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Generated by Django 5.0.2 on 2024-03-01 16:09
import logging
from django.db import migrations, models
from django.db.models import Q
logger = logging.getLogger("django")
def update_package_datasource_ids(apps, schema_editor):
"""
Compute DiscoveredPackage `datasource_ids` from the previously
present `datasource_id` field.
"""
DiscoveredPackage = apps.get_model("scanpipe", "DiscoveredPackage")
queryset = DiscoveredPackage.objects.filter(~Q(datasource_id=""))
object_count = queryset.count()
if object_count:
logger.debug(f"Compute datasource_ids for {object_count:,} packages.")
chunk_size = 2000
iterator = queryset.iterator(chunk_size=chunk_size)
unsaved_objects = []
for index, package in enumerate(iterator, start=1):
datasource_id = package.datasource_id
package.datasource_ids = [datasource_id]
unsaved_objects.append(package)
if not (index % chunk_size) and unsaved_objects:
logger.debug(f" {index:,} / {object_count:,} computed")
DiscoveredPackage.objects.bulk_update(
objs=unsaved_objects,
fields=["datasource_ids"],
batch_size=1000,
)
class Migration(migrations.Migration):
dependencies = [
("scanpipe", "0054_rename_pipeline"),
]
operations = [
migrations.AddField(
model_name="discoveredpackage",
name="datasource_ids",
field=models.JSONField(
blank=True,
default=list,
help_text="The identifiers for the datafile handlers used to obtain this package.",
),
),
migrations.RunPython(
update_package_datasource_ids,
reverse_code=migrations.RunPython.noop,
),
migrations.RemoveField(
model_name='discoveredpackage',
name='datasource_id',
),
migrations.AddField(
model_name="discoveredpackage",
name="datafile_paths",
field=models.JSONField(
blank=True,
default=list,
help_text="A list of Resource paths for package datafiles which were used to assemble this pacakage.",
),
),
]